# install.packages("epitools")
# install.packages("dplyr")
# BiocManager::install("multtest") # install bioconductor package "multtest" that contains "golub" data
# install.packages("outliers")
# install.packages("lmtest")
# install.packages("sandwich")
# install.packages("glmnet")
# BiocManager::install("ROCR")
# BiocManager::install("CMA")
# install.packages("randomForest")
# install.packages("survival")
# install.packages("KMsurv")
# install.packages("glmnet")
# install.packages("penalized")
# install.packages("PerformanceAnalytics")
# install.packages("corrr")
# install.packages("dplyr")
# install.packages("psych")
# install.packages("corrplot")
# install.packages("GGally")
# install.packages("ggcorrplot")
# BiocManager::install("multtest")
# install.packages(c("factoextra", "dendextend"))
# BiocManager::install("ComplexHeatmap")
# install.packages("caret")
# install.packages("FactoMineR")
# install.packages("klaR")
# install.packages("cba")
# install.packages("factoextra")
# install.packages("lmtest")
# install.packages("tidyverse")
# BiocManager::install("CMA")
# install.packages("randomForest")
# BiocManager::install("Biobase")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(survival)
library(KMsurv)
library(caret)
## Warning: package 'caret' was built under R version 4.4.1
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
library(klaR) #For kmode
## Warning: package 'klaR' was built under R version 4.4.1
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(cba) #For ROCK
## Warning: package 'cba' was built under R version 4.4.1
## Loading required package: grid
## Loading required package: proxy
##
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
##
## as.matrix
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
library(CMA)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'CMA'
## The following objects are masked from 'package:caret':
##
## best, rfe
library(Biobase)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.1
## corrplot 0.94 loaded
library(tinytex)
## Warning: package 'tinytex' was built under R version 4.4.1
QUESTION 1 Describe the main characteristics of the dataset: perform a univariate descriptive analysis of the first 6 variables
#Input the text file using read.table, assigning the input to a variable pdata.
viral34 <- read.table("viral.34.txt", header=T, sep="")
#Dataframe confirmed
class(viral34)
## [1] "data.frame"
str(viral34)
## 'data.frame': 140 obs. of 57 variables:
## $ infection : int 0 1 0 1 1 1 0 0 1 0 ...
## $ stime : num 7.3 6.72 7 9.33 3.44 ...
## $ sind : int 0 0 1 0 1 0 0 1 1 0 ...
## $ gender : int 0 1 1 0 1 0 1 1 0 0 ...
## $ hosp : int 1 0 1 1 0 0 0 0 1 0 ...
## $ age : int 47 47 38 45 31 41 48 47 38 44 ...
## $ ancestry : chr "A" "A" "B" "A" ...
## $ GSTM3 : num 0.1465 -0.0354 -0.2626 0.3379 0.0966 ...
## $ RP5.860F19.3 : num -0.098 -0.021 0.0108 0.3417 0.0782 ...
## $ BBC3 : num 0.3082 -0.0964 0.0885 0.3277 -0.4061 ...
## $ MMP9 : num -0.2064 0.2415 -0.0258 -0.3414 -0.1786 ...
## $ Contig35251_RC: num -0.519 -0.532 -0.34 -0.626 -0.643 ...
## $ Contig40831_RC: num -0.11515 -0.00337 -0.08097 -0.12634 0.08476 ...
## $ ALDH4A1 : num 0.1667 0.0985 0.0671 0.603 0.1597 ...
## $ SERF1A : num -0.0838 0.1067 0.0627 -0.5238 -0.0179 ...
## $ SCUBE2 : num 0.00606 0.09757 -0.12583 0.08587 -0.14412 ...
## $ MTDH : num -0.1378 0.4942 0.0723 -0.577 -0.5194 ...
## $ DCK : num -0.336 -0.58 0.139 -0.525 -0.197 ...
## $ FLT1 : num -0.0559 0.169 0.067 -0.0304 0.0635 ...
## $ PECI.1 : num -0.0878 -0.0968 -0.1134 0.1088 -0.1864 ...
## $ QSCN6L1 : num -0.1246 0.2656 0.0957 -0.0828 -0.082 ...
## $ DIAPH3 : num 0.0808 0.1249 0.234 -0.1774 0.1007 ...
## $ SLC2A3 : num 0.3686 0.4642 -0.0776 -0.2203 -0.0472 ...
## $ GPR180 : num -0.04521 -0.18754 -0.00541 0.11594 -0.11199 ...
## $ RTN4RL1 : num -0.1629 0.2001 0.1219 -0.6442 0.0282 ...
## $ Contig32125_RC: num -0.0133 -0.0133 -0.0788 -0.0236 -0.1042 ...
## $ STK32B : num 0.0278 0.1297 -0.062 0.0129 -0.1214 ...
## $ EXT1 : num -0.1345 -0.1956 -0.1134 0.0219 -0.3168 ...
## $ COL4A2 : num -0.0299 -0.2267 -0.2083 0.1027 -0.2578 ...
## $ PECI : num 0.1735 0.212 0.0423 0.4796 0.1005 ...
## $ GNAZ : num 0.0705 -0.0317 0.063 0.3349 -0.1467 ...
## $ AYTL2 : num 0.2393 0.0157 -0.1276 0.5336 0.1171 ...
## $ Contig63649_RC: num 0.02962 0.00419 0.0504 0.29442 0.02671 ...
## $ RAB6B : num 0.4614 0.0186 -0.1425 0.3254 -0.0873 ...
## $ AA555029_RC : num -0.0481 0.1593 0.1142 -0.3106 -0.2201 ...
## $ GPR126 : num -0.1002 0.2812 0.0571 0.1912 -0.0826 ...
## $ ECT2 : num 0.0354 -0.0377 -0.1813 0.0334 0.3287 ...
## $ NUSAP1 : num 0.1098 0.0323 -0.0482 0.6553 0.0795 ...
## $ GMPS : num 0.2181 0.1857 0.0404 0.2371 0.1784 ...
## $ UCHL5 : num -0.0381 -0.2708 -0.0432 -0.1923 -0.1409 ...
## $ ORC6L : num 0.173 0.102 -0.15 0.193 0.126 ...
## $ TSPYL5 : num 0.1559 -0.0588 0.0909 0.541 0.0456 ...
## $ MELK : num -0.3458 -0.0108 -0.1366 -0.3397 -0.2484 ...
## $ RUNDC1 : num 0.55836 -0.35885 0.12992 -0.07181 -0.00765 ...
## $ DIAPH3.1 : num -0.4446 -0.2426 -0.0564 -0.4456 -0.0968 ...
## $ C16orf61 : num 0.0591 -0.0502 -0.2737 0.1355 0.2048 ...
## $ TGFB3 : num 0.0818 0.1187 0.1132 -0.0205 0.1275 ...
## $ FGF18 : num -0.0482 0.2738 0.0347 0.1039 0.1898 ...
## $ CDC42BPA : num 0.192 0.1254 -0.2281 0.0328 -0.0739 ...
## $ DTL : num -1.012 -0.146 0.448 -1.077 -0.843 ...
## $ WISP1 : num -0.00498 0.21792 0.07126 -0.44042 0.11942 ...
## $ DIAPH3.2 : num -0.29778 0.02057 -0.14414 0.05123 0.00824 ...
## $ OXCT1 : num 0.0314 0.1633 0.0569 -0.2054 -0.139 ...
## $ ZNF533 : num 0.8648 0.0158 -0.1476 -0.2065 0.2885 ...
## $ RFC4 : num 0.0619 0.0169 -0.0543 0.3945 0.0624 ...
## $ KNTC2 : num 0.5975 -0.3272 0.0965 0.046 -0.0926 ...
## $ FBXO31 : num -0.0414 -0.1352 0.0352 0.0607 0.264 ...
#There are 140 observations (rows), 57 variables (columns)
dim(viral34)
## [1] 140 57
nrow(viral34)
## [1] 140
ncol(viral34)
## [1] 57
names(viral34)
## [1] "infection" "stime" "sind" "gender"
## [5] "hosp" "age" "ancestry" "GSTM3"
## [9] "RP5.860F19.3" "BBC3" "MMP9" "Contig35251_RC"
## [13] "Contig40831_RC" "ALDH4A1" "SERF1A" "SCUBE2"
## [17] "MTDH" "DCK" "FLT1" "PECI.1"
## [21] "QSCN6L1" "DIAPH3" "SLC2A3" "GPR180"
## [25] "RTN4RL1" "Contig32125_RC" "STK32B" "EXT1"
## [29] "COL4A2" "PECI" "GNAZ" "AYTL2"
## [33] "Contig63649_RC" "RAB6B" "AA555029_RC" "GPR126"
## [37] "ECT2" "NUSAP1" "GMPS" "UCHL5"
## [41] "ORC6L" "TSPYL5" "MELK" "RUNDC1"
## [45] "DIAPH3.1" "C16orf61" "TGFB3" "FGF18"
## [49] "CDC42BPA" "DTL" "WISP1" "DIAPH3.2"
## [53] "OXCT1" "ZNF533" "RFC4" "KNTC2"
## [57] "FBXO31"
head(viral34)
## infection stime sind gender hosp age ancestry GSTM3 RP5.860F19.3
## 1 0 7.296372 0 0 1 47 A 0.14647630 -0.09803689
## 2 1 6.718686 0 1 0 47 A -0.03543524 -0.02103562
## 3 0 6.995209 1 1 1 38 B -0.26258909 0.01080372
## 4 1 9.330595 0 0 1 45 A 0.33787726 0.34173748
## 5 1 3.438741 1 1 0 31 A 0.09657176 0.07818674
## 6 1 15.329227 0 0 0 41 A -0.21568976 -0.02222821
## BBC3 MMP9 Contig35251_RC Contig40831_RC ALDH4A1 SERF1A
## 1 0.30821656 -0.20635196 -0.5190545 -0.115149133 0.16674915 -0.08378990
## 2 -0.09643536 0.24147416 -0.5319210 -0.003368632 0.09845308 0.10674758
## 3 0.08854258 -0.02584139 -0.3400320 -0.080972535 0.06714804 0.06265814
## 4 0.32773524 -0.34135513 -0.6258146 -0.126344331 0.60304554 -0.52384308
## 5 -0.40614429 -0.17861930 -0.6432336 0.084764382 0.15974585 -0.01786414
## 6 0.25438095 0.22220903 0.5438846 0.155627613 -0.11070614 -0.15845941
## SCUBE2 MTDH DCK FLT1 PECI.1 QSCN6L1
## 1 0.006056118 -0.13776980 -0.3355029 -0.05592013 -0.08777178 -0.12461618
## 2 0.097569969 0.49423377 -0.5800370 0.16900027 -0.09680589 0.26558747
## 3 -0.125834721 0.07226344 0.1389293 0.06697535 -0.11336268 0.09573915
## 4 0.085869554 -0.57695111 -0.5250435 -0.03036967 0.10875631 -0.08282874
## 5 -0.144122253 -0.51943819 -0.1974320 0.06349948 -0.18639378 -0.08196639
## 6 0.003581842 0.01179057 -0.5446215 -0.01398411 -0.29762197 0.25657503
## DIAPH3 SLC2A3 GPR180 RTN4RL1 Contig32125_RC STK32B
## 1 0.08084737 0.36857538 -0.045212204 -0.16287206 -0.01326674 0.02778352
## 2 0.12485686 0.46424057 -0.187539926 0.20005740 -0.01326124 0.12973975
## 3 0.23404700 -0.07758043 -0.005409022 0.12189144 -0.07876585 -0.06204672
## 4 -0.17743971 -0.22028177 0.115940757 -0.64421170 -0.02356354 0.01292946
## 5 0.10069529 -0.04723501 -0.111992419 0.02815844 -0.10416021 -0.12139572
## 6 -0.03541076 -0.05243454 0.117939467 0.38596644 -0.16397529 -0.11000363
## EXT1 COL4A2 PECI GNAZ AYTL2 Contig63649_RC
## 1 -0.13445797 -0.02990369 0.17349207 0.07047200 0.23928807 0.029619073
## 2 -0.19559405 -0.22673707 0.21204866 -0.03168261 0.01566120 0.004188184
## 3 -0.11337939 -0.20833506 0.04232249 0.06296012 -0.12755049 0.050402671
## 4 0.02194074 0.10271000 0.47963136 0.33487679 0.53361497 0.294420521
## 5 -0.31679313 -0.25780916 0.10048933 -0.14666447 0.11709402 0.026705563
## 6 -0.12637540 -0.35700047 -0.10518681 -0.20496584 -0.05130563 -0.303730416
## RAB6B AA555029_RC GPR126 ECT2 NUSAP1 GMPS
## 1 0.46141386 -0.04808210 -0.10022007 0.03544526 0.10981625 0.21805322
## 2 0.01856611 0.15926624 0.28115470 -0.03772432 0.03225047 0.18573594
## 3 -0.14251272 0.11420782 0.05710594 -0.18130437 -0.04820767 0.04043471
## 4 0.32544391 -0.31064082 0.19116150 0.03338104 0.65528392 0.23712422
## 5 -0.08731065 -0.22007577 -0.08256859 0.32874172 0.07952344 0.17836256
## 6 -0.27515282 0.05199264 -0.16971181 -0.21921949 -0.27156456 -0.33843161
## UCHL5 ORC6L TSPYL5 MELK RUNDC1 DIAPH3.1
## 1 -0.03809348 0.1728180 0.15589646 -0.34581318 0.558363335 -0.44455761
## 2 -0.27078994 0.1017376 -0.05882551 -0.01081727 -0.358850367 -0.24259102
## 3 -0.04321627 -0.1501144 0.09089323 -0.13659874 0.129923754 -0.05644101
## 4 -0.19231373 0.1926975 0.54098979 -0.33968909 -0.071808178 -0.44559038
## 5 -0.14087134 0.1256798 0.04560219 -0.24841783 -0.007654665 -0.09683179
## 6 0.13085033 -0.3269674 -0.21852037 -0.13351906 -0.495218907 0.20965383
## C16orf61 TGFB3 FGF18 CDC42BPA DTL WISP1
## 1 0.05912505 0.08180754 -0.04819787 0.19203352 -1.0115741 -0.004976858
## 2 -0.05018147 0.11869773 0.27382112 0.12535125 -0.1460709 0.217921322
## 3 -0.27369996 0.11315389 0.03470079 -0.22807824 0.4482309 0.071255809
## 4 0.13548833 -0.02046981 0.10391225 0.03281893 -1.0765161 -0.440424509
## 5 0.20482626 0.12745233 0.18982466 -0.07390664 -0.8427116 0.119424302
## 6 -0.17109518 0.26659311 -0.19909908 -0.26596006 0.7104907 -0.166179324
## DIAPH3.2 OXCT1 ZNF533 RFC4 KNTC2 FBXO31
## 1 -0.297776741 0.03135030 0.86482037 0.06185603 0.59748058 -0.04140661
## 2 0.020572007 0.16334775 0.01575178 0.01687964 -0.32724674 -0.13521580
## 3 -0.144136303 0.05694880 -0.14760060 -0.05427720 0.09654722 0.03522958
## 4 0.051227778 -0.20543872 -0.20651322 0.39446170 0.04598343 0.06070769
## 5 0.008235576 -0.13898008 0.28849255 0.06241235 -0.09261277 0.26401621
## 6 0.121529650 0.08704478 -0.15575775 -0.19665862 -0.18365899 -0.01361086
summary(viral34)
## infection stime sind gender
## Min. :0.0000 Min. : 0.05476 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 4.69541 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median : 6.96235 Median :0.0000 Median :1.0000
## Mean :0.5071 Mean : 7.35621 Mean :0.3357 Mean :0.5571
## 3rd Qu.:1.0000 3rd Qu.:10.05681 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :17.65914 Max. :1.0000 Max. :1.0000
## hosp age ancestry GSTM3
## Min. :0.0000 Min. :26.00 Length:140 Min. :-0.359446
## 1st Qu.:0.0000 1st Qu.:41.00 Class :character 1st Qu.:-0.145519
## Median :0.0000 Median :45.00 Mode :character Median :-0.020332
## Mean :0.4786 Mean :44.25 Mean : 0.005313
## 3rd Qu.:1.0000 3rd Qu.:49.00 3rd Qu.: 0.123288
## Max. :1.0000 Max. :53.00 Max. : 0.556137
## RP5.860F19.3 BBC3 MMP9 Contig35251_RC
## Min. :-0.424157 Min. :-1.08275 Min. :-0.49427 Min. :-0.91770
## 1st Qu.:-0.107249 1st Qu.:-0.33332 1st Qu.:-0.16053 1st Qu.:-0.59254
## Median : 0.008689 Median :-0.09531 Median :-0.04761 Median :-0.40266
## Mean : 0.015576 Mean :-0.11296 Mean :-0.03699 Mean :-0.25165
## 3rd Qu.: 0.103068 3rd Qu.: 0.11098 3rd Qu.: 0.08797 3rd Qu.: 0.04371
## Max. : 0.593821 Max. : 0.60179 Max. : 0.51679 Max. : 0.99436
## Contig40831_RC ALDH4A1 SERF1A SCUBE2
## Min. :-0.471530 Min. :-0.767944 Min. :-0.556292 Min. :-0.51521
## 1st Qu.:-0.125633 1st Qu.:-0.174898 1st Qu.:-0.098369 1st Qu.:-0.12915
## Median : 0.027046 Median :-0.004138 Median : 0.004863 Median :-0.02263
## Mean : 0.005541 Mean :-0.027698 Mean :-0.007046 Mean :-0.02425
## 3rd Qu.: 0.122544 3rd Qu.: 0.137834 3rd Qu.: 0.089994 3rd Qu.: 0.07491
## Max. : 0.418517 Max. : 0.603046 Max. : 0.356074 Max. : 0.43717
## MTDH DCK FLT1 PECI.1
## Min. :-0.67564 Min. :-0.9087 Min. :-0.4825872 Min. :-0.43361
## 1st Qu.:-0.29327 1st Qu.:-0.5287 1st Qu.:-0.1008469 1st Qu.:-0.13963
## Median :-0.08343 Median :-0.3398 Median : 0.0188510 Median :-0.04026
## Mean :-0.08674 Mean :-0.3213 Mean :-0.0005165 Mean :-0.03362
## 3rd Qu.: 0.07384 3rd Qu.:-0.1596 3rd Qu.: 0.0896944 3rd Qu.: 0.05882
## Max. : 0.64056 Max. : 0.5985 Max. : 0.5082785 Max. : 0.51284
## QSCN6L1 DIAPH3 SLC2A3
## Min. :-0.379444 Min. :-0.449314 Min. :-0.3715558
## 1st Qu.:-0.046621 1st Qu.:-0.112040 1st Qu.:-0.0777269
## Median : 0.007762 Median :-0.005755 Median : 0.0005181
## Mean : 0.021679 Mean :-0.010889 Mean : 0.0113789
## 3rd Qu.: 0.098100 3rd Qu.: 0.099199 3rd Qu.: 0.0805766
## Max. : 0.540118 Max. : 0.354887 Max. : 0.4642406
## GPR180 RTN4RL1 Contig32125_RC STK32B
## Min. :-0.35519 Min. :-0.664571 Min. :-0.532111 Min. :-0.48045
## 1st Qu.:-0.08033 1st Qu.:-0.205543 1st Qu.:-0.113477 1st Qu.:-0.14288
## Median :-0.02057 Median : 0.004592 Median :-0.009005 Median :-0.02346
## Mean :-0.01371 Mean :-0.041391 Mean :-0.011050 Mean :-0.04124
## 3rd Qu.: 0.05980 3rd Qu.: 0.131846 3rd Qu.: 0.073370 3rd Qu.: 0.04487
## Max. : 0.33055 Max. : 0.428095 Max. : 0.456306 Max. : 0.45805
## EXT1 COL4A2 PECI GNAZ
## Min. :-0.47784 Min. :-0.59870 Min. :-0.44234 Min. :-0.31745
## 1st Qu.:-0.16753 1st Qu.:-0.19791 1st Qu.:-0.19421 1st Qu.:-0.09565
## Median :-0.05578 Median :-0.05285 Median :-0.06374 Median :-0.01636
## Mean :-0.05193 Mean :-0.05964 Mean :-0.03729 Mean : 0.01008
## 3rd Qu.: 0.06052 3rd Qu.: 0.06271 3rd Qu.: 0.09660 3rd Qu.: 0.08337
## Max. : 0.37411 Max. : 0.56018 Max. : 0.60898 Max. : 0.43061
## AYTL2 Contig63649_RC RAB6B AA555029_RC
## Min. :-0.69430 Min. :-0.365412 Min. :-0.56918 Min. :-0.430735
## 1st Qu.:-0.13194 1st Qu.:-0.098367 1st Qu.:-0.14308 1st Qu.:-0.159998
## Median :-0.04600 Median :-0.024872 Median :-0.05221 Median :-0.001041
## Mean :-0.02517 Mean :-0.009363 Mean :-0.01720 Mean :-0.020952
## 3rd Qu.: 0.06544 3rd Qu.: 0.090043 3rd Qu.: 0.08955 3rd Qu.: 0.107535
## Max. : 0.53361 Max. : 0.320536 Max. : 0.49465 Max. : 0.820083
## GPR126 ECT2 NUSAP1 GMPS
## Min. :-0.37971 Min. :-0.50768 Min. :-0.586304 Min. :-0.59153
## 1st Qu.:-0.13606 1st Qu.:-0.23113 1st Qu.:-0.160713 1st Qu.:-0.28408
## Median :-0.01046 Median :-0.08127 Median :-0.009314 Median :-0.04513
## Mean :-0.01639 Mean :-0.05000 Mean :-0.002911 Mean :-0.06046
## 3rd Qu.: 0.09784 3rd Qu.: 0.09838 3rd Qu.: 0.150407 3rd Qu.: 0.15284
## Max. : 0.43925 Max. : 0.77567 Max. : 0.676529 Max. : 0.55193
## UCHL5 ORC6L TSPYL5 MELK
## Min. :-0.45852 Min. :-0.79678 Min. :-0.67892 Min. :-0.78982
## 1st Qu.:-0.13107 1st Qu.:-0.21396 1st Qu.:-0.17860 1st Qu.:-0.18946
## Median :-0.03862 Median :-0.02437 Median :-0.02444 Median :-0.06113
## Mean :-0.02417 Mean :-0.05166 Mean :-0.03200 Mean :-0.04928
## 3rd Qu.: 0.09208 3rd Qu.: 0.15011 3rd Qu.: 0.13126 3rd Qu.: 0.07438
## Max. : 0.56070 Max. : 0.50672 Max. : 0.61785 Max. : 0.81893
## RUNDC1 DIAPH3.1 C16orf61 TGFB3
## Min. :-0.8704 Min. :-0.76818 Min. :-0.61186 Min. :-0.415229
## 1st Qu.:-0.3306 1st Qu.:-0.25637 1st Qu.:-0.18891 1st Qu.:-0.092384
## Median :-0.1184 Median :-0.06829 Median :-0.09306 Median :-0.005316
## Mean :-0.1059 Mean :-0.05389 Mean :-0.05912 Mean :-0.002289
## 3rd Qu.: 0.1037 3rd Qu.: 0.11787 3rd Qu.: 0.05866 3rd Qu.: 0.082730
## Max. : 0.7527 Max. : 0.70489 Max. : 0.59408 Max. : 0.439666
## FGF18 CDC42BPA DTL WISP1
## Min. :-0.597786 Min. :-0.44439 Min. :-1.2645 Min. :-0.44042
## 1st Qu.:-0.140422 1st Qu.:-0.15187 1st Qu.:-0.6506 1st Qu.:-0.08759
## Median : 0.001504 Median :-0.04357 Median :-0.1533 Median : 0.02402
## Mean :-0.023152 Mean :-0.02640 Mean :-0.2095 Mean : 0.01312
## 3rd Qu.: 0.106955 3rd Qu.: 0.08044 3rd Qu.: 0.2034 3rd Qu.: 0.12234
## Max. : 0.482246 Max. : 0.48422 Max. : 0.8919 Max. : 0.37552
## DIAPH3.2 OXCT1 ZNF533
## Min. :-0.4510200 Min. :-0.427838 Min. :-0.51090
## 1st Qu.:-0.1220947 1st Qu.:-0.090491 1st Qu.:-0.26128
## Median : 0.0088287 Median : 0.009548 Median :-0.13802
## Mean :-0.0009119 Mean : 0.016115 Mean :-0.05926
## 3rd Qu.: 0.1126542 3rd Qu.: 0.123381 3rd Qu.: 0.03807
## Max. : 0.3668805 Max. : 0.649058 Max. : 0.86482
## RFC4 KNTC2 FBXO31
## Min. :-0.5635877 Min. :-0.43109 Min. :-0.42152
## 1st Qu.:-0.0824637 1st Qu.:-0.18407 1st Qu.:-0.13880
## Median :-0.0009982 Median :-0.06158 Median :-0.04505
## Mean : 0.0080165 Mean :-0.03585 Mean :-0.02535
## 3rd Qu.: 0.1044821 3rd Qu.: 0.07221 3rd Qu.: 0.08601
## Max. : 0.4790691 Max. : 0.59748 Max. : 0.55562
#Based on these preliminary function calls, original data frame described in the pdata in roblem statement as follows:
#The data correspond to a follow-up study of 140 patients suffering from acute diarrhea of different infectious etiologies.
#The main goal of this study is to identify a biomarker signature for discriminating viral from bacterial infections.
# Variables:
# infection: Indicator of viral infection:
# (1 = viral infection; 0 = bacterial infection)
# stime: Time with symptoms (days).
# sind: Indicator of symptoms:
# (1 = symptoms finished; 0 = symptoms remain)
# Gender: (1= male, 0 = female).
# hosp: Indicator of hospitalization (1= hospitalization, 0 = no hospitalization).
# Age: Patient age at diagnosis (years).
# Ancestry: Three different ancestry groups (A, B, C)
# Columns from 8 to 57: Gene expression measurements of 50 genes
# Use a significance level alpha=0.05 for each individual test and multiple testing
# correction whenever necessary.
#Check numeric columns
is.numeric(viral34$Age)
## [1] FALSE
#Check for missing values in data
is.na (viral34)
## infection stime sind gender hosp age ancestry GSTM3 RP5.860F19.3
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [75,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [83,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [84,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [96,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [98,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [102,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [103,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [107,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [115,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [119,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## BBC3 MMP9 Contig35251_RC Contig40831_RC ALDH4A1 SERF1A SCUBE2 MTDH
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [75,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [83,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [84,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [96,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [98,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [102,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [103,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [107,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [115,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [119,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## DCK FLT1 PECI.1 QSCN6L1 DIAPH3 SLC2A3 GPR180 RTN4RL1 Contig32125_RC
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [75,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [83,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [84,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [96,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [98,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [102,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [103,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [107,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [115,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [119,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## STK32B EXT1 COL4A2 PECI GNAZ AYTL2 Contig63649_RC RAB6B AA555029_RC
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [75,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [83,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [84,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [96,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [98,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [102,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [103,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [107,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [115,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [119,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## GPR126 ECT2 NUSAP1 GMPS UCHL5 ORC6L TSPYL5 MELK RUNDC1 DIAPH3.1
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [75,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [83,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [84,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [96,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [98,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [102,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [103,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [107,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [115,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [119,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## C16orf61 TGFB3 FGF18 CDC42BPA DTL WISP1 DIAPH3.2 OXCT1 ZNF533 RFC4
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [75,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [83,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [84,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [96,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [98,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [102,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [103,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [107,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [115,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [119,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## KNTC2 FBXO31
## [1,] FALSE FALSE
## [2,] FALSE FALSE
## [3,] FALSE FALSE
## [4,] FALSE FALSE
## [5,] FALSE FALSE
## [6,] FALSE FALSE
## [7,] FALSE FALSE
## [8,] FALSE FALSE
## [9,] FALSE FALSE
## [10,] FALSE FALSE
## [11,] FALSE FALSE
## [12,] FALSE FALSE
## [13,] FALSE FALSE
## [14,] FALSE FALSE
## [15,] FALSE FALSE
## [16,] FALSE FALSE
## [17,] FALSE FALSE
## [18,] FALSE FALSE
## [19,] FALSE FALSE
## [20,] FALSE FALSE
## [21,] FALSE FALSE
## [22,] FALSE FALSE
## [23,] FALSE FALSE
## [24,] FALSE FALSE
## [25,] FALSE FALSE
## [26,] FALSE FALSE
## [27,] FALSE FALSE
## [28,] FALSE FALSE
## [29,] FALSE FALSE
## [30,] FALSE FALSE
## [31,] FALSE FALSE
## [32,] FALSE FALSE
## [33,] FALSE FALSE
## [34,] FALSE FALSE
## [35,] FALSE FALSE
## [36,] FALSE FALSE
## [37,] FALSE FALSE
## [38,] FALSE FALSE
## [39,] FALSE FALSE
## [40,] FALSE FALSE
## [41,] FALSE FALSE
## [42,] FALSE FALSE
## [43,] FALSE FALSE
## [44,] FALSE FALSE
## [45,] FALSE FALSE
## [46,] FALSE FALSE
## [47,] FALSE FALSE
## [48,] FALSE FALSE
## [49,] FALSE FALSE
## [50,] FALSE FALSE
## [51,] FALSE FALSE
## [52,] FALSE FALSE
## [53,] FALSE FALSE
## [54,] FALSE FALSE
## [55,] FALSE FALSE
## [56,] FALSE FALSE
## [57,] FALSE FALSE
## [58,] FALSE FALSE
## [59,] FALSE FALSE
## [60,] FALSE FALSE
## [61,] FALSE FALSE
## [62,] FALSE FALSE
## [63,] FALSE FALSE
## [64,] FALSE FALSE
## [65,] FALSE FALSE
## [66,] FALSE FALSE
## [67,] FALSE FALSE
## [68,] FALSE FALSE
## [69,] FALSE FALSE
## [70,] FALSE FALSE
## [71,] FALSE FALSE
## [72,] FALSE FALSE
## [73,] FALSE FALSE
## [74,] FALSE FALSE
## [75,] FALSE FALSE
## [76,] FALSE FALSE
## [77,] FALSE FALSE
## [78,] FALSE FALSE
## [79,] FALSE FALSE
## [80,] FALSE FALSE
## [81,] FALSE FALSE
## [82,] FALSE FALSE
## [83,] FALSE FALSE
## [84,] FALSE FALSE
## [85,] FALSE FALSE
## [86,] FALSE FALSE
## [87,] FALSE FALSE
## [88,] FALSE FALSE
## [89,] FALSE FALSE
## [90,] FALSE FALSE
## [91,] FALSE FALSE
## [92,] FALSE FALSE
## [93,] FALSE FALSE
## [94,] FALSE FALSE
## [95,] FALSE FALSE
## [96,] FALSE FALSE
## [97,] FALSE FALSE
## [98,] FALSE FALSE
## [99,] FALSE FALSE
## [100,] FALSE FALSE
## [101,] FALSE FALSE
## [102,] FALSE FALSE
## [103,] FALSE FALSE
## [104,] FALSE FALSE
## [105,] FALSE FALSE
## [106,] FALSE FALSE
## [107,] FALSE FALSE
## [108,] FALSE FALSE
## [109,] FALSE FALSE
## [110,] FALSE FALSE
## [111,] FALSE FALSE
## [112,] FALSE FALSE
## [113,] FALSE FALSE
## [114,] FALSE FALSE
## [115,] FALSE FALSE
## [116,] FALSE FALSE
## [117,] FALSE FALSE
## [118,] FALSE FALSE
## [119,] FALSE FALSE
## [120,] FALSE FALSE
## [121,] FALSE FALSE
## [122,] FALSE FALSE
## [123,] FALSE FALSE
## [124,] FALSE FALSE
## [125,] FALSE FALSE
## [126,] FALSE FALSE
## [127,] FALSE FALSE
## [128,] FALSE FALSE
## [129,] FALSE FALSE
## [130,] FALSE FALSE
## [131,] FALSE FALSE
## [132,] FALSE FALSE
## [133,] FALSE FALSE
## [134,] FALSE FALSE
## [135,] FALSE FALSE
## [136,] FALSE FALSE
## [137,] FALSE FALSE
## [138,] FALSE FALSE
## [139,] FALSE FALSE
## [140,] FALSE FALSE
sum(is.na(viral34))
## [1] 0
# Exclude rows that have missing data in ANY variable (na.exclude())
#viral34_o <- na.omit(viral34)
#Scale Gene Expression Values
viral34_s<-scale(viral34[,8:57])
dim(viral34_s)
## [1] 140 50
#ncol(viral34_s)
#head(viral34_s)
# # TEST SCALING
# gs<-(viral34$GSTM3-mean(viral34$GSTM3))/sd(viral34$GSTM3)
# head(gs)
# bs<-(viral34$BBC3-mean(viral34$BBC3))/sd(viral34$BBC3)
# head(bs)
#Log transformation of Gene Expression Values
viral34_l <- log(3+viral34[8:57])
dim(viral34_l)
## [1] 140 50
head(viral34_l)
## GSTM3 RP5.860F19.3 BBC3 MMP9 Contig35251_RC Contig40831_RC
## 1 1.146283 1.065387 1.1964092 1.0273483 0.9086397 1.059473
## 2 1.086730 1.091576 1.0659392 1.1760282 0.9034401 1.097489
## 3 1.007013 1.102207 1.1276993 1.0899612 0.9783141 1.071251
## 4 1.205335 1.206491 1.2022920 0.9778165 0.8646544 1.055585
## 5 1.130296 1.124341 0.9531455 1.0372264 0.8572905 1.126475
## 6 1.024000 1.091175 1.1800021 1.1700672 1.2652235 1.149187
## ALDH4A1 SERF1A SCUBE2 MTDH DCK FLT1 PECI.1 QSCN6L1
## 1 1.152706 1.0702849 1.100629 1.0516011 0.9800153 1.079796 1.0689185 1.056186
## 2 1.130903 1.1335764 1.130618 1.2511141 0.8837522 1.153416 1.0658115 1.183440
## 3 1.120748 1.1192832 1.055762 1.1224146 1.1438818 1.120692 1.0600923 1.130027
## 4 1.281779 0.9067077 1.126833 0.8850266 0.9062228 1.088437 1.1342227 1.070614
## 5 1.150492 1.0926398 1.049379 0.9084851 1.0305361 1.119558 1.0344670 1.070910
## 6 1.061012 1.0443464 1.099806 1.1025348 0.8982809 1.093940 0.9941321 1.180676
## DIAPH3 SLC2A3 GPR180 RTN4RL1 Contig32125_RC STK32B EXT1
## 1 1.125205 1.214490 1.083427 1.0427923 1.094180 1.107831 1.0527575
## 2 1.139388 1.242493 1.034060 1.1631687 1.094182 1.140950 1.0311917
## 3 1.173734 1.072412 1.096808 1.1384390 1.072006 1.077713 1.0600865
## 4 1.037644 1.022350 1.136531 0.8568754 1.090727 1.102913 1.1058993
## 5 1.131626 1.082742 1.060567 1.1079547 1.063275 1.057306 0.9870127
## 6 1.086738 1.080980 1.137172 1.2196394 1.042403 1.061255 1.0555742
## COL4A2 PECI GNAZ AYTL2 Contig63649_RC RAB6B AA555029_RC
## 1 1.0885944 1.154833 1.121831 1.175354 1.1084369 1.241677 1.0824551
## 2 1.0200246 1.166909 1.087995 1.103819 1.1000074 1.104782 1.1503398
## 3 1.0266382 1.112621 1.119382 1.055165 1.1152736 1.049943 1.1359748
## 4 1.1322759 1.246926 1.204436 1.262321 1.1922303 1.201603 0.9893029
## 5 1.0087572 1.131560 1.048489 1.136901 1.1074748 1.069077 1.0224237
## 6 0.9719145 1.062921 1.027844 1.081362 0.9918692 1.002412 1.1157947
## GPR126 ECT2 NUSAP1 GMPS UCHL5 ORC6L TSPYL5 MELK
## 1 1.064635 1.110358 1.134564 1.1687766 1.085833 1.1546201 1.149273 0.9761383
## 2 1.188195 1.085958 1.109305 1.1586833 1.004012 1.1319625 1.078809 1.0950000
## 3 1.117469 1.036274 1.082413 1.1120005 1.084102 1.0472789 1.128460 1.0520102
## 4 1.160385 1.109678 1.296174 1.1746853 1.032361 1.1608662 1.264406 0.9784430
## 5 1.070704 1.202594 1.124775 1.1563661 1.050517 1.1396518 1.113699 1.0121761
## 6 1.040379 1.022732 1.003728 0.9789156 1.141305 0.9832136 1.022983 1.0530851
## RUNDC1 DIAPH3.1 C16orf61 TGFB3 FGF18 CDC42BPA DTL WISP1
## 1 1.2693007 0.9382254 1.118129 1.125516 1.082416 1.160658 0.6873433 1.0969520
## 2 0.9712143 1.0142915 1.081744 1.137416 1.185958 1.139547 1.0486967 1.1687356
## 3 1.1410086 1.0796194 1.002945 1.135636 1.110113 1.019541 1.2378613 1.1220865
## 4 1.0743851 0.9378211 1.142785 1.091766 1.132663 1.109493 0.6541381 0.9398414
## 5 1.0960575 1.0658026 1.164658 1.140219 1.159966 1.073668 0.7688520 1.1376485
## 6 0.9182013 1.1661631 1.039890 1.183748 1.029941 1.005780 1.3111641 1.0416259
## DIAPH3.2 OXCT1 ZNF533 RFC4 KNTC2 FBXO31
## 1 0.9940749 1.109008 1.351915 1.119021 1.2802338 1.084714
## 2 1.1054462 1.151631 1.103849 1.104223 0.9831091 1.052493
## 3 1.0493743 1.117417 1.048161 1.080354 1.1302877 1.110287
## 4 1.1155441 1.027675 1.027291 1.222145 1.1138238 1.118646
## 5 1.1013537 1.051178 1.190429 1.119203 1.0672548 1.182958
## 6 1.1383232 1.127214 1.045297 1.030812 1.0354385 1.094065
#Had challenges with log transformations as negative numbers were obtained and processed, leading to errors:
#viral34[, 8:57] <- log1p(viral34[8:57]) #computes log(1+x)
#viral34[,8:57] <- lapply(viral34[,8:57], log)
#warnings()
#50: In lapply(X = x, FUN = .Generic, ...) : NaNs produced
# #Test Log transformation
# log_col1<-log(3+viral34$GSTM3)
# head(log_col1)
# log_col2<-log(3+viral34$BBC3)
# head(log_col2)
#Column bind the Original dataframe, Standardized G.E dataframe, and Log-Transformed G.E. dataframe
viral34_c <- cbind(viral34,viral34_s, viral34_l)
dim(viral34_c)
## [1] 140 157
head(viral34_c)
## infection stime sind gender hosp age ancestry GSTM3 RP5.860F19.3
## 1 0 7.296372 0 0 1 47 A 0.14647630 -0.09803689
## 2 1 6.718686 0 1 0 47 A -0.03543524 -0.02103562
## 3 0 6.995209 1 1 1 38 B -0.26258909 0.01080372
## 4 1 9.330595 0 0 1 45 A 0.33787726 0.34173748
## 5 1 3.438741 1 1 0 31 A 0.09657176 0.07818674
## 6 1 15.329227 0 0 0 41 A -0.21568976 -0.02222821
## BBC3 MMP9 Contig35251_RC Contig40831_RC ALDH4A1 SERF1A
## 1 0.30821656 -0.20635196 -0.5190545 -0.115149133 0.16674915 -0.08378990
## 2 -0.09643536 0.24147416 -0.5319210 -0.003368632 0.09845308 0.10674758
## 3 0.08854258 -0.02584139 -0.3400320 -0.080972535 0.06714804 0.06265814
## 4 0.32773524 -0.34135513 -0.6258146 -0.126344331 0.60304554 -0.52384308
## 5 -0.40614429 -0.17861930 -0.6432336 0.084764382 0.15974585 -0.01786414
## 6 0.25438095 0.22220903 0.5438846 0.155627613 -0.11070614 -0.15845941
## SCUBE2 MTDH DCK FLT1 PECI.1 QSCN6L1
## 1 0.006056118 -0.13776980 -0.3355029 -0.05592013 -0.08777178 -0.12461618
## 2 0.097569969 0.49423377 -0.5800370 0.16900027 -0.09680589 0.26558747
## 3 -0.125834721 0.07226344 0.1389293 0.06697535 -0.11336268 0.09573915
## 4 0.085869554 -0.57695111 -0.5250435 -0.03036967 0.10875631 -0.08282874
## 5 -0.144122253 -0.51943819 -0.1974320 0.06349948 -0.18639378 -0.08196639
## 6 0.003581842 0.01179057 -0.5446215 -0.01398411 -0.29762197 0.25657503
## DIAPH3 SLC2A3 GPR180 RTN4RL1 Contig32125_RC STK32B
## 1 0.08084737 0.36857538 -0.045212204 -0.16287206 -0.01326674 0.02778352
## 2 0.12485686 0.46424057 -0.187539926 0.20005740 -0.01326124 0.12973975
## 3 0.23404700 -0.07758043 -0.005409022 0.12189144 -0.07876585 -0.06204672
## 4 -0.17743971 -0.22028177 0.115940757 -0.64421170 -0.02356354 0.01292946
## 5 0.10069529 -0.04723501 -0.111992419 0.02815844 -0.10416021 -0.12139572
## 6 -0.03541076 -0.05243454 0.117939467 0.38596644 -0.16397529 -0.11000363
## EXT1 COL4A2 PECI GNAZ AYTL2 Contig63649_RC
## 1 -0.13445797 -0.02990369 0.17349207 0.07047200 0.23928807 0.029619073
## 2 -0.19559405 -0.22673707 0.21204866 -0.03168261 0.01566120 0.004188184
## 3 -0.11337939 -0.20833506 0.04232249 0.06296012 -0.12755049 0.050402671
## 4 0.02194074 0.10271000 0.47963136 0.33487679 0.53361497 0.294420521
## 5 -0.31679313 -0.25780916 0.10048933 -0.14666447 0.11709402 0.026705563
## 6 -0.12637540 -0.35700047 -0.10518681 -0.20496584 -0.05130563 -0.303730416
## RAB6B AA555029_RC GPR126 ECT2 NUSAP1 GMPS
## 1 0.46141386 -0.04808210 -0.10022007 0.03544526 0.10981625 0.21805322
## 2 0.01856611 0.15926624 0.28115470 -0.03772432 0.03225047 0.18573594
## 3 -0.14251272 0.11420782 0.05710594 -0.18130437 -0.04820767 0.04043471
## 4 0.32544391 -0.31064082 0.19116150 0.03338104 0.65528392 0.23712422
## 5 -0.08731065 -0.22007577 -0.08256859 0.32874172 0.07952344 0.17836256
## 6 -0.27515282 0.05199264 -0.16971181 -0.21921949 -0.27156456 -0.33843161
## UCHL5 ORC6L TSPYL5 MELK RUNDC1 DIAPH3.1
## 1 -0.03809348 0.1728180 0.15589646 -0.34581318 0.558363335 -0.44455761
## 2 -0.27078994 0.1017376 -0.05882551 -0.01081727 -0.358850367 -0.24259102
## 3 -0.04321627 -0.1501144 0.09089323 -0.13659874 0.129923754 -0.05644101
## 4 -0.19231373 0.1926975 0.54098979 -0.33968909 -0.071808178 -0.44559038
## 5 -0.14087134 0.1256798 0.04560219 -0.24841783 -0.007654665 -0.09683179
## 6 0.13085033 -0.3269674 -0.21852037 -0.13351906 -0.495218907 0.20965383
## C16orf61 TGFB3 FGF18 CDC42BPA DTL WISP1
## 1 0.05912505 0.08180754 -0.04819787 0.19203352 -1.0115741 -0.004976858
## 2 -0.05018147 0.11869773 0.27382112 0.12535125 -0.1460709 0.217921322
## 3 -0.27369996 0.11315389 0.03470079 -0.22807824 0.4482309 0.071255809
## 4 0.13548833 -0.02046981 0.10391225 0.03281893 -1.0765161 -0.440424509
## 5 0.20482626 0.12745233 0.18982466 -0.07390664 -0.8427116 0.119424302
## 6 -0.17109518 0.26659311 -0.19909908 -0.26596006 0.7104907 -0.166179324
## DIAPH3.2 OXCT1 ZNF533 RFC4 KNTC2 FBXO31
## 1 -0.297776741 0.03135030 0.86482037 0.06185603 0.59748058 -0.04140661
## 2 0.020572007 0.16334775 0.01575178 0.01687964 -0.32724674 -0.13521580
## 3 -0.144136303 0.05694880 -0.14760060 -0.05427720 0.09654722 0.03522958
## 4 0.051227778 -0.20543872 -0.20651322 0.39446170 0.04598343 0.06070769
## 5 0.008235576 -0.13898008 0.28849255 0.06241235 -0.09261277 0.26401621
## 6 0.121529650 0.08704478 -0.15575775 -0.19665862 -0.18365899 -0.01361086
## GSTM3 RP5.860F19.3 BBC3 MMP9 Contig35251_RC Contig40831_RC
## 1 0.7138813 -0.57440211 1.26618208 -0.81209763 -0.6159014 -0.67108894
## 2 -0.2060675 -0.18510052 0.04967577 1.33518865 -0.6455369 -0.04954233
## 3 -1.3548121 -0.02412778 0.60577550 0.05343442 -0.2035604 -0.48105268
## 4 1.6818192 1.64900082 1.32486112 -1.45942582 -0.8618013 -0.73333895
## 5 0.4615079 0.31654599 -0.88140315 -0.67912202 -0.9019223 0.44051430
## 6 -1.1176365 -0.19112999 1.10433590 1.24281408 1.8323579 0.83454368
## ALDH4A1 SERF1A SCUBE2 MTDH DCK FLT1
## 1 0.8726701 -0.44912514 0.1942805 -0.1821611 -0.05112308 -0.35139159
## 2 0.5661609 0.66594479 0.7808869 2.0740241 -0.93365927 1.07514078
## 3 0.4256655 0.40792311 -0.6511435 0.5676351 1.66112734 0.42805919
## 4 2.8307454 -3.02441939 0.7058869 -1.7499914 -0.73518462 -0.18934069
## 5 0.8412396 -0.06331218 -0.7683671 -1.5446764 0.44718207 0.40601389
## 6 -0.3725348 -0.88610863 0.1784203 0.3517534 -0.80584276 -0.08541711
## PECI.1 QSCN6L1 DIAPH3 SLC2A3 GPR180 RTN4RL1
## 1 -0.3325357 -1.0997776 0.5392279 2.2788093 -0.24330929 -0.4879262
## 2 -0.3880154 1.8335965 0.7979150 2.8891253 -1.34271285 0.9697711
## 3 -0.4896928 0.5567539 1.4397330 -0.5675343 0.06414844 0.6558194
## 4 0.8743696 -0.7856385 -0.9789797 -1.4779272 1.00150889 -2.4212151
## 5 -0.9381866 -0.7791558 0.6558937 -0.3739394 -0.75914981 0.2793431
## 6 -1.6212538 1.7658451 -0.1441357 -0.4071109 1.01694783 1.7164702
## Contig32125_RC STK32B EXT1 COL4A2 PECI GNAZ
## 1 -0.01428243 0.4260294 -0.4813926 0.1461046 0.9860751 0.3995737
## 2 -0.01424700 1.0552938 -0.8379918 -0.8211227 1.1664517 -0.2763188
## 3 -0.43623247 -0.1283947 -0.3584438 -0.7306964 0.3724338 0.3498723
## 4 -0.08061514 0.3343515 0.4308618 0.7977604 2.4182648 2.1489730
## 5 -0.59982477 -0.4946913 -1.5449311 -0.9738091 0.6445516 -1.0370811
## 6 -0.98515784 -0.4243804 -0.4342479 -1.4612293 -0.3176484 -1.4228244
## AYTL2 Contig63649_RC RAB6B AA555029_RC GPR126 ECT2
## 1 1.5729248 0.28173399 2.3501669 -0.1449979 -0.5035802 0.3578867
## 2 0.2428707 0.09793693 0.1756376 0.9631957 1.7873898 0.0514164
## 3 -0.6089022 0.43194361 -0.6153129 0.7223765 0.4414985 -0.5499677
## 4 3.3234779 2.19553759 1.6825094 -1.5482689 1.2467884 0.3492407
## 5 0.8461575 0.26067713 -0.3442524 -1.0642351 -0.3975453 1.5863570
## 6 -0.1554245 -2.12748792 -1.2666197 0.3898614 -0.9210265 -0.7087750
## NUSAP1 GMPS UCHL5 ORC6L TSPYL5 MELK RUNDC1
## 1 0.4808374 1.0797140 -0.08519916 0.8392967 0.7914942 -1.2786512 2.1740843
## 2 0.1499810 0.9544301 -1.50870499 0.5735413 -0.1130234 0.1658503 -0.8279555
## 3 -0.1932127 0.3911430 -0.11653751 -0.3680828 0.5176677 -0.3765191 0.7718018
## 4 2.8075265 1.1536463 -1.02863177 0.9136225 2.4137021 -1.2522442 0.1115334
## 5 0.3516236 0.9258458 -0.71393620 0.6630564 0.3268789 -0.8586827 0.3215078
## 6 -1.1459402 -1.0776023 0.94830393 -1.0293010 -0.7857389 -0.3632395 -1.2742896
## DIAPH3.1 C16orf61 TGFB3 FGF18 CDC42BPA DTL
## 1 -1.455677597 0.61186055 0.5877739 -0.1281211 1.2597867 -1.5254135
## 2 -0.703133863 0.04624479 0.8456102 1.5191381 0.8751973 0.1207287
## 3 -0.009523967 -1.11037043 0.8068628 0.2959395 -1.1632042 1.2510601
## 4 -1.459525754 1.00700876 -0.1270723 0.6499845 0.3415180 -1.6489300
## 5 -0.160023276 1.36580373 0.9067987 1.0894618 -0.2740209 -1.2042460
## 6 0.981966756 -0.57943337 1.8792943 -0.9000427 -1.3816873 1.7498645
## WISP1 DIAPH3.2 OXCT1 ZNF533 RFC4 KNTC2
## 1 -0.1128211 -1.82122539 0.0835812 3.2029626 0.34098731 3.1695391
## 2 1.2764815 0.13180112 0.8077063 0.2600142 0.05613357 -1.4582792
## 3 0.3623297 -0.87866209 0.2240121 -0.3061799 -0.39453158 0.6626070
## 4 -2.8269239 0.31987013 -1.2154201 -0.5103764 2.44751394 0.4095594
## 5 0.6625592 0.05611878 -0.8508345 1.2053583 0.34451067 -0.2840483
## 6 -1.1175804 0.75116261 0.3891157 -0.3344533 -1.29629087 -0.7396911
## FBXO31 GSTM3 RP5.860F19.3 BBC3 MMP9 Contig35251_RC
## 1 -0.09573504 1.146283 1.065387 1.1964092 1.0273483 0.9086397
## 2 -0.65490852 1.086730 1.091576 1.0659392 1.1760282 0.9034401
## 3 0.36107443 1.007013 1.102207 1.1276993 1.0899612 0.9783141
## 4 0.51294316 1.205335 1.206491 1.2022920 0.9778165 0.8646544
## 5 1.72481525 1.130296 1.124341 0.9531455 1.0372264 0.8572905
## 6 0.06994858 1.024000 1.091175 1.1800021 1.1700672 1.2652235
## Contig40831_RC ALDH4A1 SERF1A SCUBE2 MTDH DCK FLT1
## 1 1.059473 1.152706 1.0702849 1.100629 1.0516011 0.9800153 1.079796
## 2 1.097489 1.130903 1.1335764 1.130618 1.2511141 0.8837522 1.153416
## 3 1.071251 1.120748 1.1192832 1.055762 1.1224146 1.1438818 1.120692
## 4 1.055585 1.281779 0.9067077 1.126833 0.8850266 0.9062228 1.088437
## 5 1.126475 1.150492 1.0926398 1.049379 0.9084851 1.0305361 1.119558
## 6 1.149187 1.061012 1.0443464 1.099806 1.1025348 0.8982809 1.093940
## PECI.1 QSCN6L1 DIAPH3 SLC2A3 GPR180 RTN4RL1 Contig32125_RC
## 1 1.0689185 1.056186 1.125205 1.214490 1.083427 1.0427923 1.094180
## 2 1.0658115 1.183440 1.139388 1.242493 1.034060 1.1631687 1.094182
## 3 1.0600923 1.130027 1.173734 1.072412 1.096808 1.1384390 1.072006
## 4 1.1342227 1.070614 1.037644 1.022350 1.136531 0.8568754 1.090727
## 5 1.0344670 1.070910 1.131626 1.082742 1.060567 1.1079547 1.063275
## 6 0.9941321 1.180676 1.086738 1.080980 1.137172 1.2196394 1.042403
## STK32B EXT1 COL4A2 PECI GNAZ AYTL2 Contig63649_RC
## 1 1.107831 1.0527575 1.0885944 1.154833 1.121831 1.175354 1.1084369
## 2 1.140950 1.0311917 1.0200246 1.166909 1.087995 1.103819 1.1000074
## 3 1.077713 1.0600865 1.0266382 1.112621 1.119382 1.055165 1.1152736
## 4 1.102913 1.1058993 1.1322759 1.246926 1.204436 1.262321 1.1922303
## 5 1.057306 0.9870127 1.0087572 1.131560 1.048489 1.136901 1.1074748
## 6 1.061255 1.0555742 0.9719145 1.062921 1.027844 1.081362 0.9918692
## RAB6B AA555029_RC GPR126 ECT2 NUSAP1 GMPS UCHL5 ORC6L
## 1 1.241677 1.0824551 1.064635 1.110358 1.134564 1.1687766 1.085833 1.1546201
## 2 1.104782 1.1503398 1.188195 1.085958 1.109305 1.1586833 1.004012 1.1319625
## 3 1.049943 1.1359748 1.117469 1.036274 1.082413 1.1120005 1.084102 1.0472789
## 4 1.201603 0.9893029 1.160385 1.109678 1.296174 1.1746853 1.032361 1.1608662
## 5 1.069077 1.0224237 1.070704 1.202594 1.124775 1.1563661 1.050517 1.1396518
## 6 1.002412 1.1157947 1.040379 1.022732 1.003728 0.9789156 1.141305 0.9832136
## TSPYL5 MELK RUNDC1 DIAPH3.1 C16orf61 TGFB3 FGF18 CDC42BPA
## 1 1.149273 0.9761383 1.2693007 0.9382254 1.118129 1.125516 1.082416 1.160658
## 2 1.078809 1.0950000 0.9712143 1.0142915 1.081744 1.137416 1.185958 1.139547
## 3 1.128460 1.0520102 1.1410086 1.0796194 1.002945 1.135636 1.110113 1.019541
## 4 1.264406 0.9784430 1.0743851 0.9378211 1.142785 1.091766 1.132663 1.109493
## 5 1.113699 1.0121761 1.0960575 1.0658026 1.164658 1.140219 1.159966 1.073668
## 6 1.022983 1.0530851 0.9182013 1.1661631 1.039890 1.183748 1.029941 1.005780
## DTL WISP1 DIAPH3.2 OXCT1 ZNF533 RFC4 KNTC2 FBXO31
## 1 0.6873433 1.0969520 0.9940749 1.109008 1.351915 1.119021 1.2802338 1.084714
## 2 1.0486967 1.1687356 1.1054462 1.151631 1.103849 1.104223 0.9831091 1.052493
## 3 1.2378613 1.1220865 1.0493743 1.117417 1.048161 1.080354 1.1302877 1.110287
## 4 0.6541381 0.9398414 1.1155441 1.027675 1.027291 1.222145 1.1138238 1.118646
## 5 0.7688520 1.1376485 1.1013537 1.051178 1.190429 1.119203 1.0672548 1.182958
## 6 1.3111641 1.0416259 1.1383232 1.127214 1.045297 1.030812 1.0354385 1.094065
#Rename standardized gene expression column variables by appending "_s"
names(viral34_c)[58:107] <- paste0(names(viral34_c)[58:107], "_s")
#Rename standardized gene expression column variables by appending "_l"
names(viral34_c)[108:157] <- paste0(names(viral34_c)[108:157], "_l")
#FACTORING:
#Transform the column variable infection into a factor:
viral34_c$infection <- factor(viral34_c$infection, levels=c(0,1), labels = c("bacterial_infection", "viral_infection"))
#Transform the column variable sind into a factor:
viral34_c$sind <- factor(viral34_c$sind, levels=c(0,1), labels = c("symptoms_remain", "symptoms_finished"))
#Transform the column variable gender into a factor:
viral34_c$gender <- factor(viral34_c$gender, levels=c(0,1), labels = c("female", "male"))
#Transform the column variable gender into a factor:
viral34_c$hosp<- factor(viral34_c$hosp, levels=c(0,1), labels = c("no_hospitalization", "hospitalization"))
summary(viral34_c) #Created baseline dataframe for use of subsequent univariate analysis:
## infection stime sind gender
## bacterial_infection:69 Min. : 0.05476 symptoms_remain :93 female:62
## viral_infection :71 1st Qu.: 4.69541 symptoms_finished:47 male :78
## Median : 6.96235
## Mean : 7.35621
## 3rd Qu.:10.05681
## Max. :17.65914
## hosp age ancestry GSTM3
## no_hospitalization:73 Min. :26.00 Length:140 Min. :-0.359446
## hospitalization :67 1st Qu.:41.00 Class :character 1st Qu.:-0.145519
## Median :45.00 Mode :character Median :-0.020332
## Mean :44.25 Mean : 0.005313
## 3rd Qu.:49.00 3rd Qu.: 0.123288
## Max. :53.00 Max. : 0.556137
## RP5.860F19.3 BBC3 MMP9 Contig35251_RC
## Min. :-0.424157 Min. :-1.08275 Min. :-0.49427 Min. :-0.91770
## 1st Qu.:-0.107249 1st Qu.:-0.33332 1st Qu.:-0.16053 1st Qu.:-0.59254
## Median : 0.008689 Median :-0.09531 Median :-0.04761 Median :-0.40266
## Mean : 0.015576 Mean :-0.11296 Mean :-0.03699 Mean :-0.25165
## 3rd Qu.: 0.103068 3rd Qu.: 0.11098 3rd Qu.: 0.08797 3rd Qu.: 0.04371
## Max. : 0.593821 Max. : 0.60179 Max. : 0.51679 Max. : 0.99436
## Contig40831_RC ALDH4A1 SERF1A SCUBE2
## Min. :-0.471530 Min. :-0.767944 Min. :-0.556292 Min. :-0.51521
## 1st Qu.:-0.125633 1st Qu.:-0.174898 1st Qu.:-0.098369 1st Qu.:-0.12915
## Median : 0.027046 Median :-0.004138 Median : 0.004863 Median :-0.02263
## Mean : 0.005541 Mean :-0.027698 Mean :-0.007046 Mean :-0.02425
## 3rd Qu.: 0.122544 3rd Qu.: 0.137834 3rd Qu.: 0.089994 3rd Qu.: 0.07491
## Max. : 0.418517 Max. : 0.603046 Max. : 0.356074 Max. : 0.43717
## MTDH DCK FLT1 PECI.1
## Min. :-0.67564 Min. :-0.9087 Min. :-0.4825872 Min. :-0.43361
## 1st Qu.:-0.29327 1st Qu.:-0.5287 1st Qu.:-0.1008469 1st Qu.:-0.13963
## Median :-0.08343 Median :-0.3398 Median : 0.0188510 Median :-0.04026
## Mean :-0.08674 Mean :-0.3213 Mean :-0.0005165 Mean :-0.03362
## 3rd Qu.: 0.07384 3rd Qu.:-0.1596 3rd Qu.: 0.0896944 3rd Qu.: 0.05882
## Max. : 0.64056 Max. : 0.5985 Max. : 0.5082785 Max. : 0.51284
## QSCN6L1 DIAPH3 SLC2A3
## Min. :-0.379444 Min. :-0.449314 Min. :-0.3715558
## 1st Qu.:-0.046621 1st Qu.:-0.112040 1st Qu.:-0.0777269
## Median : 0.007762 Median :-0.005755 Median : 0.0005181
## Mean : 0.021679 Mean :-0.010889 Mean : 0.0113789
## 3rd Qu.: 0.098100 3rd Qu.: 0.099199 3rd Qu.: 0.0805766
## Max. : 0.540118 Max. : 0.354887 Max. : 0.4642406
## GPR180 RTN4RL1 Contig32125_RC STK32B
## Min. :-0.35519 Min. :-0.664571 Min. :-0.532111 Min. :-0.48045
## 1st Qu.:-0.08033 1st Qu.:-0.205543 1st Qu.:-0.113477 1st Qu.:-0.14288
## Median :-0.02057 Median : 0.004592 Median :-0.009005 Median :-0.02346
## Mean :-0.01371 Mean :-0.041391 Mean :-0.011050 Mean :-0.04124
## 3rd Qu.: 0.05980 3rd Qu.: 0.131846 3rd Qu.: 0.073370 3rd Qu.: 0.04487
## Max. : 0.33055 Max. : 0.428095 Max. : 0.456306 Max. : 0.45805
## EXT1 COL4A2 PECI GNAZ
## Min. :-0.47784 Min. :-0.59870 Min. :-0.44234 Min. :-0.31745
## 1st Qu.:-0.16753 1st Qu.:-0.19791 1st Qu.:-0.19421 1st Qu.:-0.09565
## Median :-0.05578 Median :-0.05285 Median :-0.06374 Median :-0.01636
## Mean :-0.05193 Mean :-0.05964 Mean :-0.03729 Mean : 0.01008
## 3rd Qu.: 0.06052 3rd Qu.: 0.06271 3rd Qu.: 0.09660 3rd Qu.: 0.08337
## Max. : 0.37411 Max. : 0.56018 Max. : 0.60898 Max. : 0.43061
## AYTL2 Contig63649_RC RAB6B AA555029_RC
## Min. :-0.69430 Min. :-0.365412 Min. :-0.56918 Min. :-0.430735
## 1st Qu.:-0.13194 1st Qu.:-0.098367 1st Qu.:-0.14308 1st Qu.:-0.159998
## Median :-0.04600 Median :-0.024872 Median :-0.05221 Median :-0.001041
## Mean :-0.02517 Mean :-0.009363 Mean :-0.01720 Mean :-0.020952
## 3rd Qu.: 0.06544 3rd Qu.: 0.090043 3rd Qu.: 0.08955 3rd Qu.: 0.107535
## Max. : 0.53361 Max. : 0.320536 Max. : 0.49465 Max. : 0.820083
## GPR126 ECT2 NUSAP1 GMPS
## Min. :-0.37971 Min. :-0.50768 Min. :-0.586304 Min. :-0.59153
## 1st Qu.:-0.13606 1st Qu.:-0.23113 1st Qu.:-0.160713 1st Qu.:-0.28408
## Median :-0.01046 Median :-0.08127 Median :-0.009314 Median :-0.04513
## Mean :-0.01639 Mean :-0.05000 Mean :-0.002911 Mean :-0.06046
## 3rd Qu.: 0.09784 3rd Qu.: 0.09838 3rd Qu.: 0.150407 3rd Qu.: 0.15284
## Max. : 0.43925 Max. : 0.77567 Max. : 0.676529 Max. : 0.55193
## UCHL5 ORC6L TSPYL5 MELK
## Min. :-0.45852 Min. :-0.79678 Min. :-0.67892 Min. :-0.78982
## 1st Qu.:-0.13107 1st Qu.:-0.21396 1st Qu.:-0.17860 1st Qu.:-0.18946
## Median :-0.03862 Median :-0.02437 Median :-0.02444 Median :-0.06113
## Mean :-0.02417 Mean :-0.05166 Mean :-0.03200 Mean :-0.04928
## 3rd Qu.: 0.09208 3rd Qu.: 0.15011 3rd Qu.: 0.13126 3rd Qu.: 0.07438
## Max. : 0.56070 Max. : 0.50672 Max. : 0.61785 Max. : 0.81893
## RUNDC1 DIAPH3.1 C16orf61 TGFB3
## Min. :-0.8704 Min. :-0.76818 Min. :-0.61186 Min. :-0.415229
## 1st Qu.:-0.3306 1st Qu.:-0.25637 1st Qu.:-0.18891 1st Qu.:-0.092384
## Median :-0.1184 Median :-0.06829 Median :-0.09306 Median :-0.005316
## Mean :-0.1059 Mean :-0.05389 Mean :-0.05912 Mean :-0.002289
## 3rd Qu.: 0.1037 3rd Qu.: 0.11787 3rd Qu.: 0.05866 3rd Qu.: 0.082730
## Max. : 0.7527 Max. : 0.70489 Max. : 0.59408 Max. : 0.439666
## FGF18 CDC42BPA DTL WISP1
## Min. :-0.597786 Min. :-0.44439 Min. :-1.2645 Min. :-0.44042
## 1st Qu.:-0.140422 1st Qu.:-0.15187 1st Qu.:-0.6506 1st Qu.:-0.08759
## Median : 0.001504 Median :-0.04357 Median :-0.1533 Median : 0.02402
## Mean :-0.023152 Mean :-0.02640 Mean :-0.2095 Mean : 0.01312
## 3rd Qu.: 0.106955 3rd Qu.: 0.08044 3rd Qu.: 0.2034 3rd Qu.: 0.12234
## Max. : 0.482246 Max. : 0.48422 Max. : 0.8919 Max. : 0.37552
## DIAPH3.2 OXCT1 ZNF533
## Min. :-0.4510200 Min. :-0.427838 Min. :-0.51090
## 1st Qu.:-0.1220947 1st Qu.:-0.090491 1st Qu.:-0.26128
## Median : 0.0088287 Median : 0.009548 Median :-0.13802
## Mean :-0.0009119 Mean : 0.016115 Mean :-0.05926
## 3rd Qu.: 0.1126542 3rd Qu.: 0.123381 3rd Qu.: 0.03807
## Max. : 0.3668805 Max. : 0.649058 Max. : 0.86482
## RFC4 KNTC2 FBXO31 GSTM3_s
## Min. :-0.5635877 Min. :-0.43109 Min. :-0.42152 Min. :-1.8446
## 1st Qu.:-0.0824637 1st Qu.:-0.18407 1st Qu.:-0.13880 1st Qu.:-0.7628
## Median :-0.0009982 Median :-0.06158 Median :-0.04505 Median :-0.1297
## Mean : 0.0080165 Mean :-0.03585 Mean :-0.02535 Mean : 0.0000
## 3rd Qu.: 0.1044821 3rd Qu.: 0.07221 3rd Qu.: 0.08601 3rd Qu.: 0.5966
## Max. : 0.4790691 Max. : 0.59748 Max. : 0.55562 Max. : 2.7856
## RP5.860F19.3_s BBC3_s MMP9_s Contig35251_RC_s
## Min. :-2.22320 Min. :-2.91550 Min. :-2.19263 Min. :-1.5341
## 1st Qu.:-0.62098 1st Qu.:-0.66246 1st Qu.:-0.59238 1st Qu.:-0.7852
## Median :-0.03482 Median : 0.05306 Median :-0.05094 Median :-0.3478
## Mean : 0.00000 Mean : 0.00000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.44234 3rd Qu.: 0.67322 3rd Qu.: 0.59917 3rd Qu.: 0.6803
## Max. : 2.92348 Max. : 2.14875 Max. : 2.65532 Max. : 2.8699
## Contig40831_RC_s ALDH4A1_s SERF1A_s SCUBE2_s
## Min. :-2.6527 Min. :-3.3222 Min. :-3.21432 Min. :-3.14702
## 1st Qu.:-0.7294 1st Qu.:-0.6606 1st Qu.:-0.53444 1st Qu.:-0.67239
## Median : 0.1196 Median : 0.1057 Median : 0.06969 Median : 0.01043
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.6506 3rd Qu.: 0.7429 3rd Qu.: 0.56790 3rd Qu.: 0.63563
## Max. : 2.2963 Max. : 2.8307 Max. : 2.12506 Max. : 2.95771
## MTDH_s DCK_s FLT1_s PECI.1_s
## Min. :-2.10229 Min. :-2.11967 Min. :-3.0575 Min. :-2.45636
## 1st Qu.:-0.73728 1st Qu.:-0.74828 1st Qu.:-0.6363 1st Qu.:-0.65099
## Median : 0.01181 Median :-0.06646 Median : 0.1228 Median :-0.04077
## Mean : 0.00000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.57328 3rd Qu.: 0.58355 3rd Qu.: 0.5722 3rd Qu.: 0.56769
## Max. : 2.59638 Max. : 3.31971 Max. : 3.2270 Max. : 3.35592
## QSCN6L1_s DIAPH3_s SLC2A3_s GPR180_s
## Min. :-3.0155 Min. :-2.57705 Min. :-2.44301 Min. :-2.63771
## 1st Qu.:-0.5134 1st Qu.:-0.59456 1st Qu.:-0.56847 1st Qu.:-0.51461
## Median :-0.1046 Median : 0.03018 Median :-0.06929 Median :-0.05297
## Mean : 0.0000 Mean : 0.00000 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.5745 3rd Qu.: 0.64710 3rd Qu.: 0.44146 3rd Qu.: 0.56789
## Max. : 3.8974 Max. : 2.15003 Max. : 2.88913 Max. : 2.65928
## RTN4RL1_s Contig32125_RC_s STK32B_s EXT1_s
## Min. :-2.5030 Min. :-3.35671 Min. :-2.7107 Min. :-2.48431
## 1st Qu.:-0.6593 1st Qu.:-0.65984 1st Qu.:-0.6273 1st Qu.:-0.67431
## Median : 0.1847 Median : 0.01317 Median : 0.1098 Median :-0.02248
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.6958 3rd Qu.: 0.54383 3rd Qu.: 0.5315 3rd Qu.: 0.65590
## Max. : 1.8857 Max. : 3.01074 Max. : 3.0816 Max. : 2.48502
## COL4A2_s PECI_s GNAZ_s AYTL2_s
## Min. :-2.64893 Min. :-1.8949 Min. :-2.1671 Min. :-3.9797
## 1st Qu.:-0.67945 1st Qu.:-0.7341 1st Qu.:-0.6995 1st Qu.:-0.6350
## Median : 0.03336 Median :-0.1237 Median :-0.1749 Median :-0.1238
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.60120 3rd Qu.: 0.6264 3rd Qu.: 0.4849 3rd Qu.: 0.5389
## Max. : 3.04573 Max. : 3.0234 Max. : 2.7824 Max. : 3.3235
## Contig63649_RC_s RAB6B_s AA555029_RC_s GPR126_s
## Min. :-2.5733 Min. :-2.7104 Min. :-2.1901 Min. :-2.18251
## 1st Qu.:-0.6433 1st Qu.:-0.6181 1st Qu.:-0.7431 1st Qu.:-0.71889
## Median :-0.1121 Median :-0.1719 Median : 0.1064 Median : 0.03564
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.7184 3rd Qu.: 0.5242 3rd Qu.: 0.6867 3rd Qu.: 0.68622
## Max. : 2.3843 Max. : 2.5134 Max. : 4.4950 Max. : 2.73711
## ECT2_s NUSAP1_s GMPS_s UCHL5_s
## Min. :-1.9170 Min. :-2.48846 Min. :-2.05879 Min. :-2.65714
## 1st Qu.:-0.7587 1st Qu.:-0.67310 1st Qu.:-0.86692 1st Qu.:-0.65397
## Median :-0.1310 Median :-0.02731 Median : 0.05945 Median :-0.08845
## Mean : 0.0000 Mean : 0.00000 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.6215 3rd Qu.: 0.65398 3rd Qu.: 0.82692 3rd Qu.: 0.71113
## Max. : 3.4583 Max. : 2.89815 Max. : 2.37405 Max. : 3.57788
## ORC6L_s TSPYL5_s MELK_s RUNDC1_s
## Min. :-2.7858 Min. :-2.72517 Min. :-3.19321 Min. :-2.50218
## 1st Qu.:-0.6068 1st Qu.:-0.61758 1st Qu.:-0.60447 1st Qu.:-0.73547
## Median : 0.1021 Median : 0.03182 Median :-0.05112 Median :-0.04086
## Mean : 0.0000 Mean : 0.00000 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.7544 3rd Qu.: 0.68771 3rd Qu.: 0.53324 3rd Qu.: 0.68595
## Max. : 2.0877 Max. : 2.73746 Max. : 3.74371 Max. : 2.81010
## DIAPH3.1_s C16orf61_s TGFB3_s FGF18_s
## Min. :-2.66152 Min. :-2.8602 Min. :-2.88616 Min. :-2.9395
## 1st Qu.:-0.75446 1st Qu.:-0.6716 1st Qu.:-0.62970 1st Qu.:-0.5999
## Median :-0.05369 Median :-0.1757 Median :-0.02116 Median : 0.1261
## Mean : 0.00000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.63996 3rd Qu.: 0.6095 3rd Qu.: 0.59423 3rd Qu.: 0.6656
## Max. : 2.82725 Max. : 3.3800 Max. : 3.08895 Max. : 2.5853
## CDC42BPA_s DTL_s WISP1_s DIAPH3.2_s
## Min. :-2.41079 Min. :-2.0065 Min. :-2.82692 Min. :-2.76135
## 1st Qu.:-0.72369 1st Qu.:-0.8389 1st Qu.:-0.62776 1st Qu.:-0.74344
## Median :-0.09906 Median : 0.1071 Median : 0.06794 Median : 0.05976
## Mean : 0.00000 Mean : 0.0000 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.61617 3rd Qu.: 0.7855 3rd Qu.: 0.68072 3rd Qu.: 0.69671
## Max. : 2.94497 Max. : 2.0949 Max. : 2.25875 Max. : 2.25636
## OXCT1_s ZNF533_s RFC4_s KNTC2_s
## Min. :-2.43548 Min. :-1.5654 Min. :-3.62020 Min. :-1.9780
## 1st Qu.:-0.58483 1st Qu.:-0.7002 1st Qu.:-0.57305 1st Qu.:-0.7418
## Median :-0.03602 Median :-0.2730 Median :-0.05709 Median :-0.1287
## Mean : 0.00000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.58845 3rd Qu.: 0.3374 3rd Qu.: 0.61096 3rd Qu.: 0.5408
## Max. : 3.47227 Max. : 3.2030 Max. : 2.98337 Max. : 3.1695
## FBXO31_s GSTM3_l RP5.860F19.3_l BBC3_l
## Min. :-2.3615 Min. :0.971 Min. :0.9462 Min. :0.6509
## 1st Qu.:-0.6763 1st Qu.:1.049 1st Qu.:1.0622 1st Qu.:0.9808
## Median :-0.1175 Median :1.092 Median :1.1015 Median :1.0663
## Mean : 0.0000 Mean :1.098 Mean :1.1017 Mean :1.0533
## 3rd Qu.: 0.6638 3rd Qu.:1.139 3rd Qu.:1.1324 3rd Qu.:1.1349
## Max. : 3.4630 Max. :1.269 Max. :1.2792 Max. :1.2814
## MMP9_l Contig35251_RC_l Contig40831_RC_l ALDH4A1_l
## Min. :0.9186 Min. :0.7335 Min. :0.9276 Min. :0.8029
## 1st Qu.:1.0436 1st Qu.:0.8786 1st Qu.:1.0558 1st Qu.:1.0385
## Median :1.0826 Median :0.9545 Median :1.1076 Median :1.0972
## Mean :1.0837 Mean :0.9993 Mean :1.0987 Mean :1.0865
## 3rd Qu.:1.1275 3rd Qu.:1.1131 3rd Qu.:1.1386 3rd Qu.:1.1435
## Max. :1.2575 Max. :1.3849 Max. :1.2292 Max. :1.2818
## SERF1A_l SCUBE2_l MTDH_l DCK_l
## Min. :0.8935 Min. :0.9102 Min. :0.8434 Min. :0.7378
## 1st Qu.:1.0653 1st Qu.:1.0546 1st Qu.:0.9957 1st Qu.:0.9048
## Median :1.1002 Median :1.0910 Median :1.0704 Median :0.9784
## Mean :1.0946 Mean :1.0891 Mean :1.0647 Mean :0.9801
## 3rd Qu.:1.1282 3rd Qu.:1.1233 3rd Qu.:1.1229 3rd Qu.:1.0439
## Max. :1.2108 Max. :1.2346 Max. :1.2921 Max. :1.2805
## FLT1_l PECI.1_l QSCN6L1_l DIAPH3_l
## Min. :0.9232 Min. :0.9425 Min. :0.9634 Min. :0.9364
## 1st Qu.:1.0644 1st Qu.:1.0510 1st Qu.:1.0830 1st Qu.:1.0605
## Median :1.1049 Median :1.0851 Median :1.1012 Median :1.0967
## Mean :1.0971 Mean :1.0859 Mean :1.1049 Mean :1.0933
## 3rd Qu.:1.1281 3rd Qu.:1.1180 3rd Qu.:1.1308 3rd Qu.:1.1311
## Max. :1.2551 Max. :1.2564 Max. :1.2642 Max. :1.2104
## SLC2A3_l GPR180_l RTN4RL1_l Contig32125_RC_l
## Min. :0.9664 Min. :0.9726 Min. :0.8482 Min. :0.9034
## 1st Qu.:1.0724 1st Qu.:1.0715 1st Qu.:1.0276 1st Qu.:1.0601
## Median :1.0988 Median :1.0917 Median :1.1001 Median :1.0956
## Mean :1.1011 Mean :1.0931 Mean :1.0811 Mean :1.0936
## 3rd Qu.:1.1251 3rd Qu.:1.1184 3rd Qu.:1.1416 3rd Qu.:1.1228
## Max. :1.2425 Max. :1.2031 Max. :1.2320 Max. :1.2402
## STK32B_l EXT1_l COL4A2_l PECI_l
## Min. :0.9241 Min. :0.9251 Min. :0.876 Min. :0.9391
## 1st Qu.:1.0498 1st Qu.:1.0411 1st Qu.:1.030 1st Qu.:1.0317
## Median :1.0908 Median :1.0798 Median :1.081 Median :1.0771
## Mean :1.0833 Mean :1.0795 Mean :1.076 Mean :1.0836
## 3rd Qu.:1.1135 3rd Qu.:1.1186 3rd Qu.:1.119 3rd Qu.:1.1303
## Max. :1.2407 Max. :1.2161 Max. :1.270 Max. :1.2834
## GNAZ_l AYTL2_l Contig63649_RC_l RAB6B_l
## Min. :0.9868 Min. :0.8354 Min. :0.9687 Min. :0.8882
## 1st Qu.:1.0662 1st Qu.:1.0536 1st Qu.:1.0653 1st Qu.:1.0497
## Median :1.0931 Median :1.0832 Median :1.0903 Median :1.0811
## Mean :1.1007 Mean :1.0886 Mean :1.0944 Mean :1.0906
## 3rd Qu.:1.1260 3rd Qu.:1.1202 3rd Qu.:1.1282 3rd Qu.:1.1280
## Max. :1.2327 Max. :1.2623 Max. :1.2001 Max. :1.2512
## AA555029_RC_l GPR126_l ECT2_l NUSAP1_l
## Min. :0.9436 Min. :0.9633 Min. :0.9132 Min. :0.8812
## 1st Qu.:1.0438 1st Qu.:1.0522 1st Qu.:1.0184 1st Qu.:1.0436
## Median :1.0983 Median :1.0951 Median :1.0712 Median :1.0955
## Mean :1.0897 Mean :1.0916 Mean :1.0786 Mean :1.0946
## 3rd Qu.:1.1338 3rd Qu.:1.1307 3rd Qu.:1.1309 3rd Qu.:1.1475
## Max. :1.3403 Max. :1.2353 Max. :1.3286 Max. :1.3020
## GMPS_l UCHL5_l ORC6L_l TSPYL5_l
## Min. :0.8790 Min. :0.9327 Min. :0.7899 Min. :0.842
## 1st Qu.:0.9991 1st Qu.:1.0539 1st Qu.:1.0246 1st Qu.:1.037
## Median :1.0835 Median :1.0857 Median :1.0905 Median :1.090
## Mean :1.0744 Mean :1.0890 Mean :1.0770 Mean :1.085
## 3rd Qu.:1.1483 3rd Qu.:1.1288 3rd Qu.:1.1474 3rd Qu.:1.141
## Max. :1.2675 Max. :1.2700 Max. :1.2547 Max. :1.286
## MELK_l RUNDC1_l DIAPH3.1_l C16orf61_l
## Min. :0.7931 Min. :0.7559 Min. :0.8028 Min. :0.8705
## 1st Qu.:1.0334 1st Qu.:0.9819 1st Qu.:1.0093 1st Qu.:1.0336
## Median :1.0780 Median :1.0584 Median :1.0756 Median :1.0671
## Mean :1.0790 Mean :1.0572 Mean :1.0764 Mean :1.0766
## 3rd Qu.:1.1231 3rd Qu.:1.1326 3rd Qu.:1.1371 3rd Qu.:1.1180
## Max. :1.3400 Max. :1.3225 Max. :1.3097 Max. :1.2793
## TGFB3_l FGF18_l CDC42BPA_l DTL_l
## Min. :0.9496 Min. :0.8764 Min. :0.9383 Min. :0.5513
## 1st Qu.:1.0673 1st Qu.:1.0507 1st Qu.:1.0467 1st Qu.:0.8542
## Median :1.0968 Median :1.0991 Median :1.0840 Median :1.0462
## Mean :1.0967 Mean :1.0887 Mean :1.0881 Mean :1.0074
## 3rd Qu.:1.1258 3rd Qu.:1.1336 3rd Qu.:1.1251 3rd Qu.:1.1642
## Max. :1.2354 Max. :1.2477 Max. :1.2482 Max. :1.3589
## WISP1_l DIAPH3.2_l OXCT1_l ZNF533_l
## Min. :0.9398 Min. :0.9357 Min. :0.9447 Min. :0.9119
## 1st Qu.:1.0690 1st Qu.:1.0571 1st Qu.:1.0680 1st Qu.:1.0075
## Median :1.1066 Median :1.1016 Median :1.1018 Median :1.0515
## Mean :1.1015 Mean :1.0968 Mean :1.1022 Mean :1.0742
## 3rd Qu.:1.1386 3rd Qu.:1.1355 3rd Qu.:1.1389 3rd Qu.:1.1112
## Max. :1.2165 Max. :1.2140 Max. :1.2945 Max. :1.3519
## RFC4_l KNTC2_l FBXO31_l
## Min. :0.8905 Min. :0.9435 Min. :0.9472
## 1st Qu.:1.0707 1st Qu.:1.0353 1st Qu.:1.0512
## Median :1.0983 Median :1.0779 Median :1.0835
## Mean :1.0999 Mean :1.0844 Mean :1.0886
## 3rd Qu.:1.1328 3rd Qu.:1.1224 3rd Qu.:1.1269
## Max. :1.2468 Max. :1.2802 Max. :1.2685
head(viral34_c)
## infection stime sind gender hosp age
## 1 bacterial_infection 7.296372 symptoms_remain female hospitalization 47
## 2 viral_infection 6.718686 symptoms_remain male no_hospitalization 47
## 3 bacterial_infection 6.995209 symptoms_finished male hospitalization 38
## 4 viral_infection 9.330595 symptoms_remain female hospitalization 45
## 5 viral_infection 3.438741 symptoms_finished male no_hospitalization 31
## 6 viral_infection 15.329227 symptoms_remain female no_hospitalization 41
## ancestry GSTM3 RP5.860F19.3 BBC3 MMP9 Contig35251_RC
## 1 A 0.14647630 -0.09803689 0.30821656 -0.20635196 -0.5190545
## 2 A -0.03543524 -0.02103562 -0.09643536 0.24147416 -0.5319210
## 3 B -0.26258909 0.01080372 0.08854258 -0.02584139 -0.3400320
## 4 A 0.33787726 0.34173748 0.32773524 -0.34135513 -0.6258146
## 5 A 0.09657176 0.07818674 -0.40614429 -0.17861930 -0.6432336
## 6 A -0.21568976 -0.02222821 0.25438095 0.22220903 0.5438846
## Contig40831_RC ALDH4A1 SERF1A SCUBE2 MTDH DCK
## 1 -0.115149133 0.16674915 -0.08378990 0.006056118 -0.13776980 -0.3355029
## 2 -0.003368632 0.09845308 0.10674758 0.097569969 0.49423377 -0.5800370
## 3 -0.080972535 0.06714804 0.06265814 -0.125834721 0.07226344 0.1389293
## 4 -0.126344331 0.60304554 -0.52384308 0.085869554 -0.57695111 -0.5250435
## 5 0.084764382 0.15974585 -0.01786414 -0.144122253 -0.51943819 -0.1974320
## 6 0.155627613 -0.11070614 -0.15845941 0.003581842 0.01179057 -0.5446215
## FLT1 PECI.1 QSCN6L1 DIAPH3 SLC2A3 GPR180
## 1 -0.05592013 -0.08777178 -0.12461618 0.08084737 0.36857538 -0.045212204
## 2 0.16900027 -0.09680589 0.26558747 0.12485686 0.46424057 -0.187539926
## 3 0.06697535 -0.11336268 0.09573915 0.23404700 -0.07758043 -0.005409022
## 4 -0.03036967 0.10875631 -0.08282874 -0.17743971 -0.22028177 0.115940757
## 5 0.06349948 -0.18639378 -0.08196639 0.10069529 -0.04723501 -0.111992419
## 6 -0.01398411 -0.29762197 0.25657503 -0.03541076 -0.05243454 0.117939467
## RTN4RL1 Contig32125_RC STK32B EXT1 COL4A2 PECI
## 1 -0.16287206 -0.01326674 0.02778352 -0.13445797 -0.02990369 0.17349207
## 2 0.20005740 -0.01326124 0.12973975 -0.19559405 -0.22673707 0.21204866
## 3 0.12189144 -0.07876585 -0.06204672 -0.11337939 -0.20833506 0.04232249
## 4 -0.64421170 -0.02356354 0.01292946 0.02194074 0.10271000 0.47963136
## 5 0.02815844 -0.10416021 -0.12139572 -0.31679313 -0.25780916 0.10048933
## 6 0.38596644 -0.16397529 -0.11000363 -0.12637540 -0.35700047 -0.10518681
## GNAZ AYTL2 Contig63649_RC RAB6B AA555029_RC GPR126
## 1 0.07047200 0.23928807 0.029619073 0.46141386 -0.04808210 -0.10022007
## 2 -0.03168261 0.01566120 0.004188184 0.01856611 0.15926624 0.28115470
## 3 0.06296012 -0.12755049 0.050402671 -0.14251272 0.11420782 0.05710594
## 4 0.33487679 0.53361497 0.294420521 0.32544391 -0.31064082 0.19116150
## 5 -0.14666447 0.11709402 0.026705563 -0.08731065 -0.22007577 -0.08256859
## 6 -0.20496584 -0.05130563 -0.303730416 -0.27515282 0.05199264 -0.16971181
## ECT2 NUSAP1 GMPS UCHL5 ORC6L TSPYL5
## 1 0.03544526 0.10981625 0.21805322 -0.03809348 0.1728180 0.15589646
## 2 -0.03772432 0.03225047 0.18573594 -0.27078994 0.1017376 -0.05882551
## 3 -0.18130437 -0.04820767 0.04043471 -0.04321627 -0.1501144 0.09089323
## 4 0.03338104 0.65528392 0.23712422 -0.19231373 0.1926975 0.54098979
## 5 0.32874172 0.07952344 0.17836256 -0.14087134 0.1256798 0.04560219
## 6 -0.21921949 -0.27156456 -0.33843161 0.13085033 -0.3269674 -0.21852037
## MELK RUNDC1 DIAPH3.1 C16orf61 TGFB3 FGF18
## 1 -0.34581318 0.558363335 -0.44455761 0.05912505 0.08180754 -0.04819787
## 2 -0.01081727 -0.358850367 -0.24259102 -0.05018147 0.11869773 0.27382112
## 3 -0.13659874 0.129923754 -0.05644101 -0.27369996 0.11315389 0.03470079
## 4 -0.33968909 -0.071808178 -0.44559038 0.13548833 -0.02046981 0.10391225
## 5 -0.24841783 -0.007654665 -0.09683179 0.20482626 0.12745233 0.18982466
## 6 -0.13351906 -0.495218907 0.20965383 -0.17109518 0.26659311 -0.19909908
## CDC42BPA DTL WISP1 DIAPH3.2 OXCT1 ZNF533
## 1 0.19203352 -1.0115741 -0.004976858 -0.297776741 0.03135030 0.86482037
## 2 0.12535125 -0.1460709 0.217921322 0.020572007 0.16334775 0.01575178
## 3 -0.22807824 0.4482309 0.071255809 -0.144136303 0.05694880 -0.14760060
## 4 0.03281893 -1.0765161 -0.440424509 0.051227778 -0.20543872 -0.20651322
## 5 -0.07390664 -0.8427116 0.119424302 0.008235576 -0.13898008 0.28849255
## 6 -0.26596006 0.7104907 -0.166179324 0.121529650 0.08704478 -0.15575775
## RFC4 KNTC2 FBXO31 GSTM3_s RP5.860F19.3_s BBC3_s
## 1 0.06185603 0.59748058 -0.04140661 0.7138813 -0.57440211 1.26618208
## 2 0.01687964 -0.32724674 -0.13521580 -0.2060675 -0.18510052 0.04967577
## 3 -0.05427720 0.09654722 0.03522958 -1.3548121 -0.02412778 0.60577550
## 4 0.39446170 0.04598343 0.06070769 1.6818192 1.64900082 1.32486112
## 5 0.06241235 -0.09261277 0.26401621 0.4615079 0.31654599 -0.88140315
## 6 -0.19665862 -0.18365899 -0.01361086 -1.1176365 -0.19112999 1.10433590
## MMP9_s Contig35251_RC_s Contig40831_RC_s ALDH4A1_s SERF1A_s
## 1 -0.81209763 -0.6159014 -0.67108894 0.8726701 -0.44912514
## 2 1.33518865 -0.6455369 -0.04954233 0.5661609 0.66594479
## 3 0.05343442 -0.2035604 -0.48105268 0.4256655 0.40792311
## 4 -1.45942582 -0.8618013 -0.73333895 2.8307454 -3.02441939
## 5 -0.67912202 -0.9019223 0.44051430 0.8412396 -0.06331218
## 6 1.24281408 1.8323579 0.83454368 -0.3725348 -0.88610863
## SCUBE2_s MTDH_s DCK_s FLT1_s PECI.1_s QSCN6L1_s
## 1 0.1942805 -0.1821611 -0.05112308 -0.35139159 -0.3325357 -1.0997776
## 2 0.7808869 2.0740241 -0.93365927 1.07514078 -0.3880154 1.8335965
## 3 -0.6511435 0.5676351 1.66112734 0.42805919 -0.4896928 0.5567539
## 4 0.7058869 -1.7499914 -0.73518462 -0.18934069 0.8743696 -0.7856385
## 5 -0.7683671 -1.5446764 0.44718207 0.40601389 -0.9381866 -0.7791558
## 6 0.1784203 0.3517534 -0.80584276 -0.08541711 -1.6212538 1.7658451
## DIAPH3_s SLC2A3_s GPR180_s RTN4RL1_s Contig32125_RC_s STK32B_s
## 1 0.5392279 2.2788093 -0.24330929 -0.4879262 -0.01428243 0.4260294
## 2 0.7979150 2.8891253 -1.34271285 0.9697711 -0.01424700 1.0552938
## 3 1.4397330 -0.5675343 0.06414844 0.6558194 -0.43623247 -0.1283947
## 4 -0.9789797 -1.4779272 1.00150889 -2.4212151 -0.08061514 0.3343515
## 5 0.6558937 -0.3739394 -0.75914981 0.2793431 -0.59982477 -0.4946913
## 6 -0.1441357 -0.4071109 1.01694783 1.7164702 -0.98515784 -0.4243804
## EXT1_s COL4A2_s PECI_s GNAZ_s AYTL2_s Contig63649_RC_s
## 1 -0.4813926 0.1461046 0.9860751 0.3995737 1.5729248 0.28173399
## 2 -0.8379918 -0.8211227 1.1664517 -0.2763188 0.2428707 0.09793693
## 3 -0.3584438 -0.7306964 0.3724338 0.3498723 -0.6089022 0.43194361
## 4 0.4308618 0.7977604 2.4182648 2.1489730 3.3234779 2.19553759
## 5 -1.5449311 -0.9738091 0.6445516 -1.0370811 0.8461575 0.26067713
## 6 -0.4342479 -1.4612293 -0.3176484 -1.4228244 -0.1554245 -2.12748792
## RAB6B_s AA555029_RC_s GPR126_s ECT2_s NUSAP1_s GMPS_s
## 1 2.3501669 -0.1449979 -0.5035802 0.3578867 0.4808374 1.0797140
## 2 0.1756376 0.9631957 1.7873898 0.0514164 0.1499810 0.9544301
## 3 -0.6153129 0.7223765 0.4414985 -0.5499677 -0.1932127 0.3911430
## 4 1.6825094 -1.5482689 1.2467884 0.3492407 2.8075265 1.1536463
## 5 -0.3442524 -1.0642351 -0.3975453 1.5863570 0.3516236 0.9258458
## 6 -1.2666197 0.3898614 -0.9210265 -0.7087750 -1.1459402 -1.0776023
## UCHL5_s ORC6L_s TSPYL5_s MELK_s RUNDC1_s DIAPH3.1_s
## 1 -0.08519916 0.8392967 0.7914942 -1.2786512 2.1740843 -1.455677597
## 2 -1.50870499 0.5735413 -0.1130234 0.1658503 -0.8279555 -0.703133863
## 3 -0.11653751 -0.3680828 0.5176677 -0.3765191 0.7718018 -0.009523967
## 4 -1.02863177 0.9136225 2.4137021 -1.2522442 0.1115334 -1.459525754
## 5 -0.71393620 0.6630564 0.3268789 -0.8586827 0.3215078 -0.160023276
## 6 0.94830393 -1.0293010 -0.7857389 -0.3632395 -1.2742896 0.981966756
## C16orf61_s TGFB3_s FGF18_s CDC42BPA_s DTL_s WISP1_s
## 1 0.61186055 0.5877739 -0.1281211 1.2597867 -1.5254135 -0.1128211
## 2 0.04624479 0.8456102 1.5191381 0.8751973 0.1207287 1.2764815
## 3 -1.11037043 0.8068628 0.2959395 -1.1632042 1.2510601 0.3623297
## 4 1.00700876 -0.1270723 0.6499845 0.3415180 -1.6489300 -2.8269239
## 5 1.36580373 0.9067987 1.0894618 -0.2740209 -1.2042460 0.6625592
## 6 -0.57943337 1.8792943 -0.9000427 -1.3816873 1.7498645 -1.1175804
## DIAPH3.2_s OXCT1_s ZNF533_s RFC4_s KNTC2_s FBXO31_s GSTM3_l
## 1 -1.82122539 0.0835812 3.2029626 0.34098731 3.1695391 -0.09573504 1.146283
## 2 0.13180112 0.8077063 0.2600142 0.05613357 -1.4582792 -0.65490852 1.086730
## 3 -0.87866209 0.2240121 -0.3061799 -0.39453158 0.6626070 0.36107443 1.007013
## 4 0.31987013 -1.2154201 -0.5103764 2.44751394 0.4095594 0.51294316 1.205335
## 5 0.05611878 -0.8508345 1.2053583 0.34451067 -0.2840483 1.72481525 1.130296
## 6 0.75116261 0.3891157 -0.3344533 -1.29629087 -0.7396911 0.06994858 1.024000
## RP5.860F19.3_l BBC3_l MMP9_l Contig35251_RC_l Contig40831_RC_l
## 1 1.065387 1.1964092 1.0273483 0.9086397 1.059473
## 2 1.091576 1.0659392 1.1760282 0.9034401 1.097489
## 3 1.102207 1.1276993 1.0899612 0.9783141 1.071251
## 4 1.206491 1.2022920 0.9778165 0.8646544 1.055585
## 5 1.124341 0.9531455 1.0372264 0.8572905 1.126475
## 6 1.091175 1.1800021 1.1700672 1.2652235 1.149187
## ALDH4A1_l SERF1A_l SCUBE2_l MTDH_l DCK_l FLT1_l PECI.1_l QSCN6L1_l
## 1 1.152706 1.0702849 1.100629 1.0516011 0.9800153 1.079796 1.0689185 1.056186
## 2 1.130903 1.1335764 1.130618 1.2511141 0.8837522 1.153416 1.0658115 1.183440
## 3 1.120748 1.1192832 1.055762 1.1224146 1.1438818 1.120692 1.0600923 1.130027
## 4 1.281779 0.9067077 1.126833 0.8850266 0.9062228 1.088437 1.1342227 1.070614
## 5 1.150492 1.0926398 1.049379 0.9084851 1.0305361 1.119558 1.0344670 1.070910
## 6 1.061012 1.0443464 1.099806 1.1025348 0.8982809 1.093940 0.9941321 1.180676
## DIAPH3_l SLC2A3_l GPR180_l RTN4RL1_l Contig32125_RC_l STK32B_l EXT1_l
## 1 1.125205 1.214490 1.083427 1.0427923 1.094180 1.107831 1.0527575
## 2 1.139388 1.242493 1.034060 1.1631687 1.094182 1.140950 1.0311917
## 3 1.173734 1.072412 1.096808 1.1384390 1.072006 1.077713 1.0600865
## 4 1.037644 1.022350 1.136531 0.8568754 1.090727 1.102913 1.1058993
## 5 1.131626 1.082742 1.060567 1.1079547 1.063275 1.057306 0.9870127
## 6 1.086738 1.080980 1.137172 1.2196394 1.042403 1.061255 1.0555742
## COL4A2_l PECI_l GNAZ_l AYTL2_l Contig63649_RC_l RAB6B_l AA555029_RC_l
## 1 1.0885944 1.154833 1.121831 1.175354 1.1084369 1.241677 1.0824551
## 2 1.0200246 1.166909 1.087995 1.103819 1.1000074 1.104782 1.1503398
## 3 1.0266382 1.112621 1.119382 1.055165 1.1152736 1.049943 1.1359748
## 4 1.1322759 1.246926 1.204436 1.262321 1.1922303 1.201603 0.9893029
## 5 1.0087572 1.131560 1.048489 1.136901 1.1074748 1.069077 1.0224237
## 6 0.9719145 1.062921 1.027844 1.081362 0.9918692 1.002412 1.1157947
## GPR126_l ECT2_l NUSAP1_l GMPS_l UCHL5_l ORC6L_l TSPYL5_l MELK_l
## 1 1.064635 1.110358 1.134564 1.1687766 1.085833 1.1546201 1.149273 0.9761383
## 2 1.188195 1.085958 1.109305 1.1586833 1.004012 1.1319625 1.078809 1.0950000
## 3 1.117469 1.036274 1.082413 1.1120005 1.084102 1.0472789 1.128460 1.0520102
## 4 1.160385 1.109678 1.296174 1.1746853 1.032361 1.1608662 1.264406 0.9784430
## 5 1.070704 1.202594 1.124775 1.1563661 1.050517 1.1396518 1.113699 1.0121761
## 6 1.040379 1.022732 1.003728 0.9789156 1.141305 0.9832136 1.022983 1.0530851
## RUNDC1_l DIAPH3.1_l C16orf61_l TGFB3_l FGF18_l CDC42BPA_l DTL_l
## 1 1.2693007 0.9382254 1.118129 1.125516 1.082416 1.160658 0.6873433
## 2 0.9712143 1.0142915 1.081744 1.137416 1.185958 1.139547 1.0486967
## 3 1.1410086 1.0796194 1.002945 1.135636 1.110113 1.019541 1.2378613
## 4 1.0743851 0.9378211 1.142785 1.091766 1.132663 1.109493 0.6541381
## 5 1.0960575 1.0658026 1.164658 1.140219 1.159966 1.073668 0.7688520
## 6 0.9182013 1.1661631 1.039890 1.183748 1.029941 1.005780 1.3111641
## WISP1_l DIAPH3.2_l OXCT1_l ZNF533_l RFC4_l KNTC2_l FBXO31_l
## 1 1.0969520 0.9940749 1.109008 1.351915 1.119021 1.2802338 1.084714
## 2 1.1687356 1.1054462 1.151631 1.103849 1.104223 0.9831091 1.052493
## 3 1.1220865 1.0493743 1.117417 1.048161 1.080354 1.1302877 1.110287
## 4 0.9398414 1.1155441 1.027675 1.027291 1.222145 1.1138238 1.118646
## 5 1.1376485 1.1013537 1.051178 1.190429 1.119203 1.0672548 1.182958
## 6 1.0416259 1.1383232 1.127214 1.045297 1.030812 1.0354385 1.094065
#PERFORMING UNIVARIATE analysis of the first 6 columns:
#ATTACH/DETACH METHODS NOT USED HERE:
#INFECTION
#Categorical data that Had numerical value 1 or 0 and was later factored
# absolute frequencies
freq.cc<-table(viral34_c$infection)
freq.cc
##
## bacterial_infection viral_infection
## 69 71
#bacterial_infection viral_infection
#69 71
# relative frequencies
relfreq.cc<-freq.cc/nrow(viral34_c)
relfreq.cc
##
## bacterial_infection viral_infection
## 0.4928571 0.5071429
#bacterial_infection viral_infection
#0.4928571 0.5071429
# relative frequencies (ALTERNATIVE METHOD)
relfreq.cc<-prop.table(table(viral34_c$infection))
relfreq.cc
##
## bacterial_infection viral_infection
## 0.4928571 0.5071429
# function cbinb() is used to combine two tables
freqtablecc<-cbind(freq.cc, relfreq.cc)
freqtablecc
## freq.cc relfreq.cc
## bacterial_infection 69 0.4928571
## viral_infection 71 0.5071429
options(digits=4)
freqtablecc
## freq.cc relfreq.cc
## bacterial_infection 69 0.4929
## viral_infection 71 0.5071
#freq.cc relfreq.cc
#bacterial_infection 69 0.4929
#viral_infection 71 0.5071
barplot(freq.cc)
barplot(relfreq.cc, xlab="status", ylab="relative frequency", names.arg=c("bacterial_infection", "viral_infection"), col=c("green", "red"))
pie(relfreq.cc, labels=c("bacterial_infection", "viral_infection"),col=c("green", "red"))
#AGE
#Age is a continuous, numerical variable
#Summary statistics
# mean or average
mean(viral34_c$age)
## [1] 44.25
## [1] 44.25
# median
median(viral34_c$age)
## [1] 45
## [1] 45
# range
max(viral34_c$age)-min(viral34_c$age)
## [1] 27
## [1] 27
# variance
var(viral34_c$age)
## [1] 28.81
## [1] 28.81
# standard deviation
sd(viral34_c$age)
## [1] 5.367
## [1] 5.367
# coeficient of variation (in percentage)
100*sd(viral34_c$age)/mean(viral34_c$age)
## [1] 12.13
## [1] 12.13
# minimum, first , second and third quartiles, and maximum
quantile(viral34_c$age)
## 0% 25% 50% 75% 100%
## 26 41 45 49 53
#0% 25% 50% 75% 100%
#26 41 45 49 53
# interquartile range
IQR(viral34_c$age)
## [1] 8
## [1] 8
# 35% and 63% quantiles
quantile(viral34_c$age, c(0.35,0.63))
## 35% 63%
## 42 47
#35% 63%
# 42 47
#Histogram
hist(viral34_c$age)
#Boxplot:
boxplot(viral34_c$age, ylab="gene expression", col="blue")
#Error in plot.new() : figure margins too large
#Density Function
hist(viral34_c$age)
density<-density(viral34_c$age)
plot(density)
#Error in plot.new() : figure margins too large
#Empirical cumulative distribution
f<-ecdf(viral34_c$age)
plot(f)
#Error in plot.new() : figure margins too large
#Testing for outliers in age:
library(outliers)
##
## Attaching package: 'outliers'
## The following object is masked from 'package:randomForest':
##
## outlier
grubbs.test(viral34_c$age)
##
## Grubbs test for one outlier
##
## data: viral34_c$age
## G = 3.40, U = 0.92, p-value = 0.04
## alternative hypothesis: lowest value 26 is an outlier
# Grubbs test for one outlier
#
# data: viral34_c$age
# G = 3.40, U = 0.92, p-value = 0.04
# alternative hypothesis: lowest value 26 is an outlier
#Testing for normal distribution of age:
shapiro.test(viral34_c$age)
##
## Shapiro-Wilk normality test
##
## data: viral34_c$age
## W = 0.96, p-value = 3e-04
# Shapiro-Wilk normality test
#
# data: viral34_c$age
# W = 0.96, p-value = 3e-04
#Age is not normally distriuted with p<=0.05
#HOSP
#Hospitalization is Categorical data that Had numerical value 1 or 0 and was earlier factored.
#Indicator of hospitalization (1= hospitalization, 0 = no hospitalization).
# absolute frequencies
freq.cc<-table(viral34_c$hosp)
freq.cc
##
## no_hospitalization hospitalization
## 73 67
# no_hospitalization hospitalization
# 73 67
# relative frequencies
relfreq.cc<-freq.cc/nrow(viral34_c)
relfreq.cc
##
## no_hospitalization hospitalization
## 0.5214 0.4786
# no_hospitalization hospitalization
# 0.5214 0.4786
# relative frequencies (ALTERNATIVE METHOD)
relfreq.cc<-prop.table(table(viral34_c$hosp))
relfreq.cc
##
## no_hospitalization hospitalization
## 0.5214 0.4786
# function cbinb() is used to combine two tables
freqtablecc<-cbind(freq.cc, relfreq.cc)
freqtablecc
## freq.cc relfreq.cc
## no_hospitalization 73 0.5214
## hospitalization 67 0.4786
options(digits=4)
freqtablecc
## freq.cc relfreq.cc
## no_hospitalization 73 0.5214
## hospitalization 67 0.4786
# freq.cc relfreq.cc
# no_hospitalization 73 0.5214
# hospitalization 67 0.4786
barplot(freq.cc)
barplot(relfreq.cc, xlab="status", ylab="relative frequency", names.arg=c("hospitalization", "no_hospitalization"), col=c("green", "red"))
pie(relfreq.cc, labels=c("hospitalization", "no_hospitalization"),col=c("green", "red"))
#stime
#symptom time is a continuous, numerical variable with units of days
#Summary statistics
# mean or average
mean(viral34_c$stime)
## [1] 7.356
# median
## 7.356
median(viral34_c$stime)
## [1] 6.962
## 6.962
# range
max(viral34_c$stime)-min(viral34_c$stime)
## [1] 17.6
## 17.6
var(viral34_c$stime) # variance
## [1] 16.42
## 16.42
sd(viral34_c$stime) # standard deviation
## [1] 4.052
## 4.052
# coeficient of variation (in percentage)
100*sd(viral34_c$stime)/mean(viral34_c$stime)
## [1] 55.08
## 55.08
# minimum, first , second and third quartiles, and maximum
quantile(viral34_c$stime)
## 0% 25% 50% 75% 100%
## 0.05476 4.69541 6.96235 10.05681 17.65914
# 0% 25% 50% 75% 100%
# 0.05476 4.69541 6.96235 10.05681 17.65914
# interquartile range
IQR(viral34_c$stime)
## [1] 5.361
## 5.361
# 35% and 63% quantiles
quantile(viral34_c$stime, c(0.35,0.63))
## 35% 63%
## 5.566 8.432
# 35% 63%
# 5.566 8.432
#Histogram
hist(viral34_c$stime)
#Boxplot:
boxplot(viral34_c$stime, ylab="stime", col="blue")
#Error in plot.new() : figure margins too large
#Density Function
hist(viral34_c$stime)
density<-density(viral34_c$stime)
plot(density)
#Error in plot.new() : figure margins too large
#Empirical cumulative distribution
f<-ecdf(viral34_c$stime)
plot(f)
#Error in plot.new() : figure margins too large
#Testing for outliers:
grubbs.test(viral34_c$stime)
##
## Grubbs test for one outlier
##
## data: viral34_c$stime
## G = 2.54, U = 0.95, p-value = 0.7
## alternative hypothesis: highest value 17.65913758 is an outlier
#Testing for normal distribution of age:
shapiro.test(viral34_c$stime)
##
## Shapiro-Wilk normality test
##
## data: viral34_c$stime
## W = 0.98, p-value = 0.02
#sind
#sind is Categorical data that Had numerical value 1 or 0 and was earlier factored.Indicator of symptoms:
# (1 = symptoms finished; 0 = symptoms remain)
# absolute frequencies
freq.cc<-table(viral34_c$sind)
freq.cc
##
## symptoms_remain symptoms_finished
## 93 47
# symptoms_remain symptoms_finished
# 93 47
# relative frequencies
relfreq.cc<-freq.cc/nrow(viral34_c)
relfreq.cc
##
## symptoms_remain symptoms_finished
## 0.6643 0.3357
# symptoms_remain symptoms_finished
# 0.6643 0.3357
# relative frequencies (ALTERNATIVE METHOD)
relfreq.cc<-prop.table(table(viral34_c$sind))
relfreq.cc
##
## symptoms_remain symptoms_finished
## 0.6643 0.3357
# function cbinb() is used to combine two tables
freqtablecc<-cbind(freq.cc, relfreq.cc)
freqtablecc
## freq.cc relfreq.cc
## symptoms_remain 93 0.6643
## symptoms_finished 47 0.3357
options(digits=4)
freqtablecc
## freq.cc relfreq.cc
## symptoms_remain 93 0.6643
## symptoms_finished 47 0.3357
# freq.cc relfreq.cc
# symptoms_remain 93 0.6643
# symptoms_finished 47 0.3357
barplot(freq.cc)
barplot(relfreq.cc, xlab="status", ylab="relative frequency", names.arg=c("symptoms_remain", "symptoms_finished"), col=c("green", "red"))
pie(relfreq.cc, labels=c("symptoms_remain", "symptoms_finished"),col=c("green", "red"))
#gender
#gender is Categorical data that Had numerical value 1 or 0 and was earlier factored as "female" (base case) and "male"
# absolute frequencies
freq.cc<-table(viral34_c$gender)
freq.cc
##
## female male
## 62 78
# female male
# 62 78
# relative frequencies
relfreq.cc<-freq.cc/nrow(viral34_c)
relfreq.cc
##
## female male
## 0.4429 0.5571
# female male
# 0.4429 0.5571
# relative frequencies (ALTERNATIVE METHOD)
relfreq.cc<-prop.table(table(viral34_c$gender))
relfreq.cc
##
## female male
## 0.4429 0.5571
# function cbinb() is used to combine two tables
freqtablecc<-cbind(freq.cc, relfreq.cc)
freqtablecc
## freq.cc relfreq.cc
## female 62 0.4429
## male 78 0.5571
options(digits=4)
freqtablecc
## freq.cc relfreq.cc
## female 62 0.4429
## male 78 0.5571
# freq.cc relfreq.cc
# female 62 0.4429
# male 78 0.5571
barplot(freq.cc)
barplot(relfreq.cc, xlab="status", ylab="relative frequency", names.arg=c("female", "male"), col=c("green", "red"))
pie(relfreq.cc, labels=c("female", "male"),col=c("green", "red"))
#BRIEF BIVARIATE ANALYSIS
#Continuous+Continuous
plot(viral34_c$age, viral34_c$stime)
abline(lm(viral34_c$stime ~ viral34_c$age))
#Continuous+Categorical
tapply(viral34_c$age, viral34_c$gender, mean)
## female male
## 43.56 44.79
boxplot(viral34_c$age~viral34_c$gender)
QUESTION 2 Perform hierarchical clustering of (scaled) gene expression levels and explore possible relationships between genes. How many gene clusters are observed? Performing hierarchical clustering of all GENES (NEED TO TRANSPOSE MATRIX FOR GENE ROWS!) according to all genes using Euclidean distance and average linkage algorithm Note: After standardization of gene expression levels, Euclidean and Correlation Distance are the same. Euclidean and Manhattan distance measure absolute differences between vectors (gene expression levels), but Euclidean is less robust towards outliers
class(viral34_c)
## [1] "data.frame"
#[1] "data.frame"
str(viral34_c)
## 'data.frame': 140 obs. of 157 variables:
## $ infection : Factor w/ 2 levels "bacterial_infection",..: 1 2 1 2 2 2 1 1 2 1 ...
## $ stime : num 7.3 6.72 7 9.33 3.44 ...
## $ sind : Factor w/ 2 levels "symptoms_remain",..: 1 1 2 1 2 1 1 2 2 1 ...
## $ gender : Factor w/ 2 levels "female","male": 1 2 2 1 2 1 2 2 1 1 ...
## $ hosp : Factor w/ 2 levels "no_hospitalization",..: 2 1 2 2 1 1 1 1 2 1 ...
## $ age : int 47 47 38 45 31 41 48 47 38 44 ...
## $ ancestry : chr "A" "A" "B" "A" ...
## $ GSTM3 : num 0.1465 -0.0354 -0.2626 0.3379 0.0966 ...
## $ RP5.860F19.3 : num -0.098 -0.021 0.0108 0.3417 0.0782 ...
## $ BBC3 : num 0.3082 -0.0964 0.0885 0.3277 -0.4061 ...
## $ MMP9 : num -0.2064 0.2415 -0.0258 -0.3414 -0.1786 ...
## $ Contig35251_RC : num -0.519 -0.532 -0.34 -0.626 -0.643 ...
## $ Contig40831_RC : num -0.11515 -0.00337 -0.08097 -0.12634 0.08476 ...
## $ ALDH4A1 : num 0.1667 0.0985 0.0671 0.603 0.1597 ...
## $ SERF1A : num -0.0838 0.1067 0.0627 -0.5238 -0.0179 ...
## $ SCUBE2 : num 0.00606 0.09757 -0.12583 0.08587 -0.14412 ...
## $ MTDH : num -0.1378 0.4942 0.0723 -0.577 -0.5194 ...
## $ DCK : num -0.336 -0.58 0.139 -0.525 -0.197 ...
## $ FLT1 : num -0.0559 0.169 0.067 -0.0304 0.0635 ...
## $ PECI.1 : num -0.0878 -0.0968 -0.1134 0.1088 -0.1864 ...
## $ QSCN6L1 : num -0.1246 0.2656 0.0957 -0.0828 -0.082 ...
## $ DIAPH3 : num 0.0808 0.1249 0.234 -0.1774 0.1007 ...
## $ SLC2A3 : num 0.3686 0.4642 -0.0776 -0.2203 -0.0472 ...
## $ GPR180 : num -0.04521 -0.18754 -0.00541 0.11594 -0.11199 ...
## $ RTN4RL1 : num -0.1629 0.2001 0.1219 -0.6442 0.0282 ...
## $ Contig32125_RC : num -0.0133 -0.0133 -0.0788 -0.0236 -0.1042 ...
## $ STK32B : num 0.0278 0.1297 -0.062 0.0129 -0.1214 ...
## $ EXT1 : num -0.1345 -0.1956 -0.1134 0.0219 -0.3168 ...
## $ COL4A2 : num -0.0299 -0.2267 -0.2083 0.1027 -0.2578 ...
## $ PECI : num 0.1735 0.212 0.0423 0.4796 0.1005 ...
## $ GNAZ : num 0.0705 -0.0317 0.063 0.3349 -0.1467 ...
## $ AYTL2 : num 0.2393 0.0157 -0.1276 0.5336 0.1171 ...
## $ Contig63649_RC : num 0.02962 0.00419 0.0504 0.29442 0.02671 ...
## $ RAB6B : num 0.4614 0.0186 -0.1425 0.3254 -0.0873 ...
## $ AA555029_RC : num -0.0481 0.1593 0.1142 -0.3106 -0.2201 ...
## $ GPR126 : num -0.1002 0.2812 0.0571 0.1912 -0.0826 ...
## $ ECT2 : num 0.0354 -0.0377 -0.1813 0.0334 0.3287 ...
## $ NUSAP1 : num 0.1098 0.0323 -0.0482 0.6553 0.0795 ...
## $ GMPS : num 0.2181 0.1857 0.0404 0.2371 0.1784 ...
## $ UCHL5 : num -0.0381 -0.2708 -0.0432 -0.1923 -0.1409 ...
## $ ORC6L : num 0.173 0.102 -0.15 0.193 0.126 ...
## $ TSPYL5 : num 0.1559 -0.0588 0.0909 0.541 0.0456 ...
## $ MELK : num -0.3458 -0.0108 -0.1366 -0.3397 -0.2484 ...
## $ RUNDC1 : num 0.55836 -0.35885 0.12992 -0.07181 -0.00765 ...
## $ DIAPH3.1 : num -0.4446 -0.2426 -0.0564 -0.4456 -0.0968 ...
## $ C16orf61 : num 0.0591 -0.0502 -0.2737 0.1355 0.2048 ...
## $ TGFB3 : num 0.0818 0.1187 0.1132 -0.0205 0.1275 ...
## $ FGF18 : num -0.0482 0.2738 0.0347 0.1039 0.1898 ...
## $ CDC42BPA : num 0.192 0.1254 -0.2281 0.0328 -0.0739 ...
## $ DTL : num -1.012 -0.146 0.448 -1.077 -0.843 ...
## $ WISP1 : num -0.00498 0.21792 0.07126 -0.44042 0.11942 ...
## $ DIAPH3.2 : num -0.29778 0.02057 -0.14414 0.05123 0.00824 ...
## $ OXCT1 : num 0.0314 0.1633 0.0569 -0.2054 -0.139 ...
## $ ZNF533 : num 0.8648 0.0158 -0.1476 -0.2065 0.2885 ...
## $ RFC4 : num 0.0619 0.0169 -0.0543 0.3945 0.0624 ...
## $ KNTC2 : num 0.5975 -0.3272 0.0965 0.046 -0.0926 ...
## $ FBXO31 : num -0.0414 -0.1352 0.0352 0.0607 0.264 ...
## $ GSTM3_s : num 0.714 -0.206 -1.355 1.682 0.462 ...
## $ RP5.860F19.3_s : num -0.5744 -0.1851 -0.0241 1.649 0.3165 ...
## $ BBC3_s : num 1.2662 0.0497 0.6058 1.3249 -0.8814 ...
## $ MMP9_s : num -0.8121 1.3352 0.0534 -1.4594 -0.6791 ...
## $ Contig35251_RC_s: num -0.616 -0.646 -0.204 -0.862 -0.902 ...
## $ Contig40831_RC_s: num -0.6711 -0.0495 -0.4811 -0.7333 0.4405 ...
## $ ALDH4A1_s : num 0.873 0.566 0.426 2.831 0.841 ...
## $ SERF1A_s : num -0.4491 0.6659 0.4079 -3.0244 -0.0633 ...
## $ SCUBE2_s : num 0.194 0.781 -0.651 0.706 -0.768 ...
## $ MTDH_s : num -0.182 2.074 0.568 -1.75 -1.545 ...
## $ DCK_s : num -0.0511 -0.9337 1.6611 -0.7352 0.4472 ...
## $ FLT1_s : num -0.351 1.075 0.428 -0.189 0.406 ...
## $ PECI.1_s : num -0.333 -0.388 -0.49 0.874 -0.938 ...
## $ QSCN6L1_s : num -1.1 1.834 0.557 -0.786 -0.779 ...
## $ DIAPH3_s : num 0.539 0.798 1.44 -0.979 0.656 ...
## $ SLC2A3_s : num 2.279 2.889 -0.568 -1.478 -0.374 ...
## $ GPR180_s : num -0.2433 -1.3427 0.0641 1.0015 -0.7591 ...
## $ RTN4RL1_s : num -0.488 0.97 0.656 -2.421 0.279 ...
## $ Contig32125_RC_s: num -0.0143 -0.0142 -0.4362 -0.0806 -0.5998 ...
## $ STK32B_s : num 0.426 1.055 -0.128 0.334 -0.495 ...
## $ EXT1_s : num -0.481 -0.838 -0.358 0.431 -1.545 ...
## $ COL4A2_s : num 0.146 -0.821 -0.731 0.798 -0.974 ...
## $ PECI_s : num 0.986 1.166 0.372 2.418 0.645 ...
## $ GNAZ_s : num 0.4 -0.276 0.35 2.149 -1.037 ...
## $ AYTL2_s : num 1.573 0.243 -0.609 3.323 0.846 ...
## $ Contig63649_RC_s: num 0.2817 0.0979 0.4319 2.1955 0.2607 ...
## $ RAB6B_s : num 2.35 0.176 -0.615 1.683 -0.344 ...
## $ AA555029_RC_s : num -0.145 0.963 0.722 -1.548 -1.064 ...
## $ GPR126_s : num -0.504 1.787 0.441 1.247 -0.398 ...
## $ ECT2_s : num 0.3579 0.0514 -0.55 0.3492 1.5864 ...
## $ NUSAP1_s : num 0.481 0.15 -0.193 2.808 0.352 ...
## $ GMPS_s : num 1.08 0.954 0.391 1.154 0.926 ...
## $ UCHL5_s : num -0.0852 -1.5087 -0.1165 -1.0286 -0.7139 ...
## $ ORC6L_s : num 0.839 0.574 -0.368 0.914 0.663 ...
## $ TSPYL5_s : num 0.791 -0.113 0.518 2.414 0.327 ...
## $ MELK_s : num -1.279 0.166 -0.377 -1.252 -0.859 ...
## $ RUNDC1_s : num 2.174 -0.828 0.772 0.112 0.322 ...
## $ DIAPH3.1_s : num -1.45568 -0.70313 -0.00952 -1.45953 -0.16002 ...
## $ C16orf61_s : num 0.6119 0.0462 -1.1104 1.007 1.3658 ...
## $ TGFB3_s : num 0.588 0.846 0.807 -0.127 0.907 ...
## $ FGF18_s : num -0.128 1.519 0.296 0.65 1.089 ...
## $ CDC42BPA_s : num 1.26 0.875 -1.163 0.342 -0.274 ...
## [list output truncated]
dim(viral34_c)
## [1] 140 157
hc_genes1<-hclust(dist(t(viral34_c[58:107]),method="euclidean"),method="average")
plot(hc_genes1)
#Performing hierarchical clustering of all genes according to all genes using Euclidean distance and (superior?) ward.D2 linkage algorithm
hc_genes2<-hclust(dist(t(viral34_c[58:107]),method="euclidean"),method="ward.D2")
plot(hc_genes2)
# Cut in five groups
# label size
fviz_dend(hc_genes2, k = 5,
cex = 0.5,
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07", "green"),
color_labels_by_k = TRUE, # color labels by groups
ggtheme = theme_gray(),
main = "Dendrogram - ward.D2",
xlab = "Objects", ylab = "Distance", sub = ""
#Change theme
)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
QUESTION 3 Perform hierarchical clustering of INDIVIDUALS according to their (scaled) gene expression levels and explore possible relationships between them. How many clusters of individuals are observed? Check visually whether the clustering is related to infection, gender, hospitalization or ancestry.
#Performing hierarchical clustering of all individuals according to all scaled genes using Euclidean distance and average linkage algorithm
hc_individuals1<-hclust(dist((viral34_c[1:140,58:107]),method="euclidean"),method="average")
plot(hc_individuals1)
#Performing hierarchical clustering of all individuals according to all scaled genes using Euclidean distance and (superior?) ward.D2 linkage algorithm
hc_individuals2<-hclust(dist((viral34_c[1:140,58:107]),method="euclidean"),method="ward.D2")
plot(hc_individuals2)
# Cut in seven groups
# label size
fviz_dend(hc_individuals2, k = 7,
cex = 0.5,
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07", "green", "orange", "purple"),
color_labels_by_k = TRUE, # color labels by groups
ggtheme = theme_gray(),
main = "Dendrogram - ward.D2",
xlab = "Objects", ylab = "Distance", sub = ""
# Change theme
)
#Creating labels to visually check whether the clustering is related to infection, gender, hospitalization or ancestry.
#x<-viral34_c$infection
#x
# x<-t(viral34_c$infection)
# x
# #SIMILAR TO LECTURE NOTE: x<-factor(golub.cl, labels=c("*","ALL"))
#plot(hc_individuals2, labels=x)
plot(hc_individuals2, labels=viral34_c$infection)#NOT RELATED
plot(hc_individuals2, labels=viral34_c$gender)#NOT RELATED
plot(hc_individuals2, labels=viral34_c$hosp)#NOT RELATED
plot(hc_individuals2, labels=viral34_c$ancestry)#NOT RELATED
#Counted 6 Individual Gene clusters infection, gender, hospitalization or ancestry.
#I observed no relationships between clusters and
QUESTION 4 Perform K-means clustering with k=2 and test whether the clustering is associated to (a) the kind of infection and (b) the risk of hospitalization. Interpret the results.
#Applying kmeans to continuous variables (same dataframe as before: X = [all rows, columns corresponding to genes].
#Since problem statement doesn't specify, I still used here the transposed continuous scaled gene expression column variables
#Clustering individuals according to gene expression
kdata<-(viral34_c[, 58:107])
kcluster<-kmeans(kdata, 2, nstart=10)
kcluster
## K-means clustering with 2 clusters of sizes 64, 76
##
## Cluster means:
## GSTM3_s RP5.860F19.3_s BBC3_s MMP9_s Contig35251_RC_s Contig40831_RC_s
## 1 0.5900 0.6123 0.3327 -0.4213 -0.3588 -0.2909
## 2 -0.4969 -0.5156 -0.2802 0.3547 0.3021 0.2450
## ALDH4A1_s SERF1A_s SCUBE2_s MTDH_s DCK_s FLT1_s PECI.1_s QSCN6L1_s
## 1 0.7237 -0.2024 0.2849 -0.3974 0.1583 -0.09694 0.4486 -0.4184
## 2 -0.6094 0.1704 -0.2399 0.3346 -0.1333 0.08164 -0.3778 0.3524
## DIAPH3_s SLC2A3_s GPR180_s RTN4RL1_s Contig32125_RC_s STK32B_s EXT1_s
## 1 -0.2461 -0.02405 0.10183 -0.6597 0.3900 0.1789 0.2275
## 2 0.2072 0.02026 -0.08575 0.5555 -0.3284 -0.1506 -0.1916
## COL4A2_s PECI_s GNAZ_s AYTL2_s Contig63649_RC_s RAB6B_s AA555029_RC_s
## 1 0.4872 0.4333 0.4928 0.6521 0.5203 0.5185 -0.5422
## 2 -0.4103 -0.3649 -0.4150 -0.5491 -0.4382 -0.4366 0.4566
## GPR126_s ECT2_s NUSAP1_s GMPS_s UCHL5_s ORC6L_s TSPYL5_s MELK_s RUNDC1_s
## 1 -0.04061 0.2304 0.6191 0.7942 -0.4506 0.6737 0.6112 0.08017 0.3151
## 2 0.03420 -0.1940 -0.5214 -0.6688 0.3794 -0.5673 -0.5147 -0.06751 -0.2653
## DIAPH3.1_s C16orf61_s TGFB3_s FGF18_s CDC42BPA_s DTL_s WISP1_s DIAPH3.2_s
## 1 -0.6318 0.6253 0.2035 0.5933 0.6742 -0.5649 -0.1367 -0.4026
## 2 0.5321 -0.5266 -0.1713 -0.4996 -0.5677 0.4757 0.1151 0.3390
## OXCT1_s ZNF533_s RFC4_s KNTC2_s FBXO31_s
## 1 -0.3678 0.3251 0.6447 0.4141 0.005484
## 2 0.3097 -0.2738 -0.5429 -0.3487 -0.004618
##
## Clustering vector:
## [1] 1 2 2 1 1 2 2 2 1 1 1 1 2 2 1 2 2 2 1 1 1 1 2 1 1 2 1 2 1 2 1 1 2 2 2 1 2
## [38] 1 2 2 2 2 2 1 2 1 2 2 1 2 1 2 1 2 2 2 2 2 2 2 1 2 1 1 1 2 2 2 1 2 2 2 2 1
## [75] 1 2 1 2 2 1 1 2 2 2 1 2 1 1 2 1 1 1 2 2 1 2 1 1 2 1 2 1 1 2 1 2 1 1 2 2 2
## [112] 2 1 1 1 2 2 2 2 1 2 2 2 1 2 1 1 1 2 1 1 1 2 1 1 2 1 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 3112 2626
## (between_SS / total_SS = 17.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
# Results:
# Within cluster sum of squares by cluster:
# [1] 117.4392 156.1455
# (between_SS / total_SS = 20.3 %)
#Based on results, K-means clustering with 2 clusters of sizes 76, 64
kmeans(kdata,2)$cluster
## [1] 1 2 2 1 1 2 2 2 1 1 1 1 2 2 1 2 2 2 1 1 1 1 2 1 1 2 1 2 1 2 1 1 2 2 2 1 2
## [38] 1 2 2 2 2 2 1 2 1 2 2 1 2 1 2 1 2 2 2 2 2 2 2 1 2 1 1 1 2 2 2 1 2 2 2 2 1
## [75] 1 2 1 2 2 1 1 2 2 2 1 2 1 1 2 1 1 1 2 2 1 2 1 1 2 1 2 1 1 2 1 2 1 1 2 2 2
## [112] 2 1 1 1 2 2 2 2 1 2 2 2 1 2 1 1 1 2 1 1 1 2 1 1 2 1 2 2 2
#Running summary of dataframe provides proportions of categorical variables that may coincide with these 2 cluster sizes:
summary(viral34_c)
## infection stime sind gender
## bacterial_infection:69 Min. : 0.055 symptoms_remain :93 female:62
## viral_infection :71 1st Qu.: 4.695 symptoms_finished:47 male :78
## Median : 6.962
## Mean : 7.356
## 3rd Qu.:10.057
## Max. :17.659
## hosp age ancestry GSTM3
## no_hospitalization:73 Min. :26.0 Length:140 Min. :-0.3594
## hospitalization :67 1st Qu.:41.0 Class :character 1st Qu.:-0.1455
## Median :45.0 Mode :character Median :-0.0203
## Mean :44.2 Mean : 0.0053
## 3rd Qu.:49.0 3rd Qu.: 0.1233
## Max. :53.0 Max. : 0.5561
## RP5.860F19.3 BBC3 MMP9 Contig35251_RC
## Min. :-0.4242 Min. :-1.0828 Min. :-0.4943 Min. :-0.9177
## 1st Qu.:-0.1072 1st Qu.:-0.3333 1st Qu.:-0.1605 1st Qu.:-0.5925
## Median : 0.0087 Median :-0.0953 Median :-0.0476 Median :-0.4027
## Mean : 0.0156 Mean :-0.1130 Mean :-0.0370 Mean :-0.2517
## 3rd Qu.: 0.1031 3rd Qu.: 0.1110 3rd Qu.: 0.0880 3rd Qu.: 0.0437
## Max. : 0.5938 Max. : 0.6018 Max. : 0.5168 Max. : 0.9944
## Contig40831_RC ALDH4A1 SERF1A SCUBE2
## Min. :-0.4715 Min. :-0.7679 Min. :-0.5563 Min. :-0.5152
## 1st Qu.:-0.1256 1st Qu.:-0.1749 1st Qu.:-0.0984 1st Qu.:-0.1291
## Median : 0.0270 Median :-0.0041 Median : 0.0049 Median :-0.0226
## Mean : 0.0055 Mean :-0.0277 Mean :-0.0070 Mean :-0.0243
## 3rd Qu.: 0.1225 3rd Qu.: 0.1378 3rd Qu.: 0.0900 3rd Qu.: 0.0749
## Max. : 0.4185 Max. : 0.6030 Max. : 0.3561 Max. : 0.4372
## MTDH DCK FLT1 PECI.1
## Min. :-0.6756 Min. :-0.909 Min. :-0.4826 Min. :-0.4336
## 1st Qu.:-0.2933 1st Qu.:-0.529 1st Qu.:-0.1008 1st Qu.:-0.1396
## Median :-0.0834 Median :-0.340 Median : 0.0189 Median :-0.0403
## Mean :-0.0867 Mean :-0.321 Mean :-0.0005 Mean :-0.0336
## 3rd Qu.: 0.0738 3rd Qu.:-0.160 3rd Qu.: 0.0897 3rd Qu.: 0.0588
## Max. : 0.6406 Max. : 0.599 Max. : 0.5083 Max. : 0.5128
## QSCN6L1 DIAPH3 SLC2A3 GPR180
## Min. :-0.3794 Min. :-0.4493 Min. :-0.3716 Min. :-0.3552
## 1st Qu.:-0.0466 1st Qu.:-0.1120 1st Qu.:-0.0777 1st Qu.:-0.0803
## Median : 0.0078 Median :-0.0058 Median : 0.0005 Median :-0.0206
## Mean : 0.0217 Mean :-0.0109 Mean : 0.0114 Mean :-0.0137
## 3rd Qu.: 0.0981 3rd Qu.: 0.0992 3rd Qu.: 0.0806 3rd Qu.: 0.0598
## Max. : 0.5401 Max. : 0.3549 Max. : 0.4642 Max. : 0.3306
## RTN4RL1 Contig32125_RC STK32B EXT1
## Min. :-0.6646 Min. :-0.5321 Min. :-0.4804 Min. :-0.4778
## 1st Qu.:-0.2055 1st Qu.:-0.1135 1st Qu.:-0.1429 1st Qu.:-0.1675
## Median : 0.0046 Median :-0.0090 Median :-0.0235 Median :-0.0558
## Mean :-0.0414 Mean :-0.0110 Mean :-0.0412 Mean :-0.0519
## 3rd Qu.: 0.1318 3rd Qu.: 0.0734 3rd Qu.: 0.0449 3rd Qu.: 0.0605
## Max. : 0.4281 Max. : 0.4563 Max. : 0.4580 Max. : 0.3741
## COL4A2 PECI GNAZ AYTL2
## Min. :-0.5987 Min. :-0.4423 Min. :-0.3175 Min. :-0.6943
## 1st Qu.:-0.1979 1st Qu.:-0.1942 1st Qu.:-0.0956 1st Qu.:-0.1319
## Median :-0.0528 Median :-0.0637 Median :-0.0164 Median :-0.0460
## Mean :-0.0596 Mean :-0.0373 Mean : 0.0101 Mean :-0.0252
## 3rd Qu.: 0.0627 3rd Qu.: 0.0966 3rd Qu.: 0.0834 3rd Qu.: 0.0654
## Max. : 0.5602 Max. : 0.6090 Max. : 0.4306 Max. : 0.5336
## Contig63649_RC RAB6B AA555029_RC GPR126
## Min. :-0.3654 Min. :-0.5692 Min. :-0.431 Min. :-0.3797
## 1st Qu.:-0.0984 1st Qu.:-0.1431 1st Qu.:-0.160 1st Qu.:-0.1361
## Median :-0.0249 Median :-0.0522 Median :-0.001 Median :-0.0105
## Mean :-0.0094 Mean :-0.0172 Mean :-0.021 Mean :-0.0164
## 3rd Qu.: 0.0900 3rd Qu.: 0.0896 3rd Qu.: 0.107 3rd Qu.: 0.0978
## Max. : 0.3205 Max. : 0.4946 Max. : 0.820 Max. : 0.4393
## ECT2 NUSAP1 GMPS UCHL5
## Min. :-0.5077 Min. :-0.5863 Min. :-0.5915 Min. :-0.4585
## 1st Qu.:-0.2311 1st Qu.:-0.1607 1st Qu.:-0.2841 1st Qu.:-0.1311
## Median :-0.0813 Median :-0.0093 Median :-0.0451 Median :-0.0386
## Mean :-0.0500 Mean :-0.0029 Mean :-0.0605 Mean :-0.0242
## 3rd Qu.: 0.0984 3rd Qu.: 0.1504 3rd Qu.: 0.1528 3rd Qu.: 0.0921
## Max. : 0.7757 Max. : 0.6765 Max. : 0.5519 Max. : 0.5607
## ORC6L TSPYL5 MELK RUNDC1
## Min. :-0.7968 Min. :-0.6789 Min. :-0.7898 Min. :-0.870
## 1st Qu.:-0.2140 1st Qu.:-0.1786 1st Qu.:-0.1895 1st Qu.:-0.331
## Median :-0.0244 Median :-0.0244 Median :-0.0611 Median :-0.118
## Mean :-0.0517 Mean :-0.0320 Mean :-0.0493 Mean :-0.106
## 3rd Qu.: 0.1501 3rd Qu.: 0.1313 3rd Qu.: 0.0744 3rd Qu.: 0.104
## Max. : 0.5067 Max. : 0.6178 Max. : 0.8189 Max. : 0.753
## DIAPH3.1 C16orf61 TGFB3 FGF18
## Min. :-0.7682 Min. :-0.6119 Min. :-0.4152 Min. :-0.5978
## 1st Qu.:-0.2564 1st Qu.:-0.1889 1st Qu.:-0.0924 1st Qu.:-0.1404
## Median :-0.0683 Median :-0.0931 Median :-0.0053 Median : 0.0015
## Mean :-0.0539 Mean :-0.0591 Mean :-0.0023 Mean :-0.0232
## 3rd Qu.: 0.1179 3rd Qu.: 0.0587 3rd Qu.: 0.0827 3rd Qu.: 0.1070
## Max. : 0.7049 Max. : 0.5941 Max. : 0.4397 Max. : 0.4822
## CDC42BPA DTL WISP1 DIAPH3.2
## Min. :-0.4444 Min. :-1.264 Min. :-0.4404 Min. :-0.4510
## 1st Qu.:-0.1519 1st Qu.:-0.651 1st Qu.:-0.0876 1st Qu.:-0.1221
## Median :-0.0436 Median :-0.153 Median : 0.0240 Median : 0.0088
## Mean :-0.0264 Mean :-0.209 Mean : 0.0131 Mean :-0.0009
## 3rd Qu.: 0.0804 3rd Qu.: 0.203 3rd Qu.: 0.1223 3rd Qu.: 0.1127
## Max. : 0.4842 Max. : 0.892 Max. : 0.3755 Max. : 0.3669
## OXCT1 ZNF533 RFC4 KNTC2
## Min. :-0.4278 Min. :-0.5109 Min. :-0.5636 Min. :-0.4311
## 1st Qu.:-0.0905 1st Qu.:-0.2613 1st Qu.:-0.0825 1st Qu.:-0.1841
## Median : 0.0095 Median :-0.1380 Median :-0.0010 Median :-0.0616
## Mean : 0.0161 Mean :-0.0593 Mean : 0.0080 Mean :-0.0359
## 3rd Qu.: 0.1234 3rd Qu.: 0.0381 3rd Qu.: 0.1045 3rd Qu.: 0.0722
## Max. : 0.6491 Max. : 0.8648 Max. : 0.4791 Max. : 0.5975
## FBXO31 GSTM3_s RP5.860F19.3_s BBC3_s
## Min. :-0.4215 Min. :-1.845 Min. :-2.2232 Min. :-2.9155
## 1st Qu.:-0.1388 1st Qu.:-0.763 1st Qu.:-0.6210 1st Qu.:-0.6625
## Median :-0.0451 Median :-0.130 Median :-0.0348 Median : 0.0531
## Mean :-0.0253 Mean : 0.000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.0860 3rd Qu.: 0.597 3rd Qu.: 0.4423 3rd Qu.: 0.6732
## Max. : 0.5556 Max. : 2.786 Max. : 2.9235 Max. : 2.1487
## MMP9_s Contig35251_RC_s Contig40831_RC_s ALDH4A1_s
## Min. :-2.1926 Min. :-1.534 Min. :-2.653 Min. :-3.322
## 1st Qu.:-0.5924 1st Qu.:-0.785 1st Qu.:-0.729 1st Qu.:-0.661
## Median :-0.0509 Median :-0.348 Median : 0.120 Median : 0.106
## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.5992 3rd Qu.: 0.680 3rd Qu.: 0.651 3rd Qu.: 0.743
## Max. : 2.6553 Max. : 2.870 Max. : 2.296 Max. : 2.831
## SERF1A_s SCUBE2_s MTDH_s DCK_s
## Min. :-3.214 Min. :-3.1470 Min. :-2.1023 Min. :-2.120
## 1st Qu.:-0.534 1st Qu.:-0.6724 1st Qu.:-0.7373 1st Qu.:-0.748
## Median : 0.070 Median : 0.0104 Median : 0.0118 Median :-0.066
## Mean : 0.000 Mean : 0.0000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.568 3rd Qu.: 0.6356 3rd Qu.: 0.5733 3rd Qu.: 0.584
## Max. : 2.125 Max. : 2.9577 Max. : 2.5964 Max. : 3.320
## FLT1_s PECI.1_s QSCN6L1_s DIAPH3_s
## Min. :-3.057 Min. :-2.456 Min. :-3.015 Min. :-2.5771
## 1st Qu.:-0.636 1st Qu.:-0.651 1st Qu.:-0.513 1st Qu.:-0.5946
## Median : 0.123 Median :-0.041 Median :-0.105 Median : 0.0302
## Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean : 0.0000
## 3rd Qu.: 0.572 3rd Qu.: 0.568 3rd Qu.: 0.575 3rd Qu.: 0.6471
## Max. : 3.227 Max. : 3.356 Max. : 3.897 Max. : 2.1500
## SLC2A3_s GPR180_s RTN4RL1_s Contig32125_RC_s
## Min. :-2.4430 Min. :-2.638 Min. :-2.503 Min. :-3.357
## 1st Qu.:-0.5685 1st Qu.:-0.515 1st Qu.:-0.659 1st Qu.:-0.660
## Median :-0.0693 Median :-0.053 Median : 0.185 Median : 0.013
## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.4415 3rd Qu.: 0.568 3rd Qu.: 0.696 3rd Qu.: 0.544
## Max. : 2.8891 Max. : 2.659 Max. : 1.886 Max. : 3.011
## STK32B_s EXT1_s COL4A2_s PECI_s
## Min. :-2.711 Min. :-2.4843 Min. :-2.6489 Min. :-1.895
## 1st Qu.:-0.627 1st Qu.:-0.6743 1st Qu.:-0.6795 1st Qu.:-0.734
## Median : 0.110 Median :-0.0225 Median : 0.0334 Median :-0.124
## Mean : 0.000 Mean : 0.0000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.531 3rd Qu.: 0.6559 3rd Qu.: 0.6012 3rd Qu.: 0.626
## Max. : 3.082 Max. : 2.4850 Max. : 3.0457 Max. : 3.023
## GNAZ_s AYTL2_s Contig63649_RC_s RAB6B_s
## Min. :-2.167 Min. :-3.980 Min. :-2.573 Min. :-2.710
## 1st Qu.:-0.700 1st Qu.:-0.635 1st Qu.:-0.643 1st Qu.:-0.618
## Median :-0.175 Median :-0.124 Median :-0.112 Median :-0.172
## Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.485 3rd Qu.: 0.539 3rd Qu.: 0.718 3rd Qu.: 0.524
## Max. : 2.782 Max. : 3.323 Max. : 2.384 Max. : 2.513
## AA555029_RC_s GPR126_s ECT2_s NUSAP1_s
## Min. :-2.190 Min. :-2.1825 Min. :-1.917 Min. :-2.4885
## 1st Qu.:-0.743 1st Qu.:-0.7189 1st Qu.:-0.759 1st Qu.:-0.6731
## Median : 0.106 Median : 0.0356 Median :-0.131 Median :-0.0273
## Mean : 0.000 Mean : 0.0000 Mean : 0.000 Mean : 0.0000
## 3rd Qu.: 0.687 3rd Qu.: 0.6862 3rd Qu.: 0.621 3rd Qu.: 0.6540
## Max. : 4.495 Max. : 2.7371 Max. : 3.458 Max. : 2.8981
## GMPS_s UCHL5_s ORC6L_s TSPYL5_s
## Min. :-2.0588 Min. :-2.657 Min. :-2.786 Min. :-2.7252
## 1st Qu.:-0.8669 1st Qu.:-0.654 1st Qu.:-0.607 1st Qu.:-0.6176
## Median : 0.0594 Median :-0.088 Median : 0.102 Median : 0.0318
## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.0000
## 3rd Qu.: 0.8269 3rd Qu.: 0.711 3rd Qu.: 0.754 3rd Qu.: 0.6877
## Max. : 2.3741 Max. : 3.578 Max. : 2.088 Max. : 2.7375
## MELK_s RUNDC1_s DIAPH3.1_s C16orf61_s
## Min. :-3.193 Min. :-2.5022 Min. :-2.6615 Min. :-2.860
## 1st Qu.:-0.604 1st Qu.:-0.7355 1st Qu.:-0.7545 1st Qu.:-0.672
## Median :-0.051 Median :-0.0409 Median :-0.0537 Median :-0.176
## Mean : 0.000 Mean : 0.0000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.533 3rd Qu.: 0.6859 3rd Qu.: 0.6400 3rd Qu.: 0.609
## Max. : 3.744 Max. : 2.8101 Max. : 2.8273 Max. : 3.380
## TGFB3_s FGF18_s CDC42BPA_s DTL_s
## Min. :-2.8862 Min. :-2.939 Min. :-2.4108 Min. :-2.006
## 1st Qu.:-0.6297 1st Qu.:-0.600 1st Qu.:-0.7237 1st Qu.:-0.839
## Median :-0.0212 Median : 0.126 Median :-0.0991 Median : 0.107
## Mean : 0.0000 Mean : 0.000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.5942 3rd Qu.: 0.666 3rd Qu.: 0.6162 3rd Qu.: 0.785
## Max. : 3.0889 Max. : 2.585 Max. : 2.9450 Max. : 2.095
## WISP1_s DIAPH3.2_s OXCT1_s ZNF533_s
## Min. :-2.8269 Min. :-2.7614 Min. :-2.435 Min. :-1.565
## 1st Qu.:-0.6278 1st Qu.:-0.7434 1st Qu.:-0.585 1st Qu.:-0.700
## Median : 0.0679 Median : 0.0598 Median :-0.036 Median :-0.273
## Mean : 0.0000 Mean : 0.0000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.6807 3rd Qu.: 0.6967 3rd Qu.: 0.588 3rd Qu.: 0.337
## Max. : 2.2588 Max. : 2.2564 Max. : 3.472 Max. : 3.203
## RFC4_s KNTC2_s FBXO31_s GSTM3_l
## Min. :-3.620 Min. :-1.978 Min. :-2.362 Min. :0.971
## 1st Qu.:-0.573 1st Qu.:-0.742 1st Qu.:-0.676 1st Qu.:1.049
## Median :-0.057 Median :-0.129 Median :-0.117 Median :1.092
## Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean :1.098
## 3rd Qu.: 0.611 3rd Qu.: 0.541 3rd Qu.: 0.664 3rd Qu.:1.139
## Max. : 2.983 Max. : 3.170 Max. : 3.463 Max. :1.269
## RP5.860F19.3_l BBC3_l MMP9_l Contig35251_RC_l
## Min. :0.946 Min. :0.651 Min. :0.919 Min. :0.734
## 1st Qu.:1.062 1st Qu.:0.981 1st Qu.:1.044 1st Qu.:0.879
## Median :1.101 Median :1.066 Median :1.083 Median :0.955
## Mean :1.102 Mean :1.053 Mean :1.084 Mean :0.999
## 3rd Qu.:1.132 3rd Qu.:1.135 3rd Qu.:1.127 3rd Qu.:1.113
## Max. :1.279 Max. :1.281 Max. :1.258 Max. :1.385
## Contig40831_RC_l ALDH4A1_l SERF1A_l SCUBE2_l
## Min. :0.928 Min. :0.803 Min. :0.893 Min. :0.91
## 1st Qu.:1.056 1st Qu.:1.038 1st Qu.:1.065 1st Qu.:1.05
## Median :1.108 Median :1.097 Median :1.100 Median :1.09
## Mean :1.099 Mean :1.087 Mean :1.095 Mean :1.09
## 3rd Qu.:1.139 3rd Qu.:1.143 3rd Qu.:1.128 3rd Qu.:1.12
## Max. :1.229 Max. :1.282 Max. :1.211 Max. :1.23
## MTDH_l DCK_l FLT1_l PECI.1_l
## Min. :0.843 Min. :0.738 Min. :0.923 Min. :0.942
## 1st Qu.:0.996 1st Qu.:0.905 1st Qu.:1.064 1st Qu.:1.051
## Median :1.070 Median :0.978 Median :1.105 Median :1.085
## Mean :1.065 Mean :0.980 Mean :1.097 Mean :1.086
## 3rd Qu.:1.123 3rd Qu.:1.044 3rd Qu.:1.128 3rd Qu.:1.118
## Max. :1.292 Max. :1.280 Max. :1.255 Max. :1.256
## QSCN6L1_l DIAPH3_l SLC2A3_l GPR180_l
## Min. :0.963 Min. :0.936 Min. :0.966 Min. :0.973
## 1st Qu.:1.083 1st Qu.:1.060 1st Qu.:1.072 1st Qu.:1.071
## Median :1.101 Median :1.097 Median :1.099 Median :1.092
## Mean :1.105 Mean :1.093 Mean :1.101 Mean :1.093
## 3rd Qu.:1.131 3rd Qu.:1.131 3rd Qu.:1.125 3rd Qu.:1.118
## Max. :1.264 Max. :1.210 Max. :1.242 Max. :1.203
## RTN4RL1_l Contig32125_RC_l STK32B_l EXT1_l
## Min. :0.848 Min. :0.903 Min. :0.924 Min. :0.925
## 1st Qu.:1.028 1st Qu.:1.060 1st Qu.:1.050 1st Qu.:1.041
## Median :1.100 Median :1.096 Median :1.091 Median :1.080
## Mean :1.081 Mean :1.094 Mean :1.083 Mean :1.079
## 3rd Qu.:1.142 3rd Qu.:1.123 3rd Qu.:1.113 3rd Qu.:1.119
## Max. :1.232 Max. :1.240 Max. :1.241 Max. :1.216
## COL4A2_l PECI_l GNAZ_l AYTL2_l
## Min. :0.876 Min. :0.939 Min. :0.987 Min. :0.835
## 1st Qu.:1.030 1st Qu.:1.032 1st Qu.:1.066 1st Qu.:1.054
## Median :1.081 Median :1.077 Median :1.093 Median :1.083
## Mean :1.076 Mean :1.084 Mean :1.101 Mean :1.089
## 3rd Qu.:1.119 3rd Qu.:1.130 3rd Qu.:1.126 3rd Qu.:1.120
## Max. :1.270 Max. :1.283 Max. :1.233 Max. :1.262
## Contig63649_RC_l RAB6B_l AA555029_RC_l GPR126_l
## Min. :0.969 Min. :0.888 Min. :0.944 Min. :0.963
## 1st Qu.:1.065 1st Qu.:1.050 1st Qu.:1.044 1st Qu.:1.052
## Median :1.090 Median :1.081 Median :1.098 Median :1.095
## Mean :1.094 Mean :1.091 Mean :1.090 Mean :1.092
## 3rd Qu.:1.128 3rd Qu.:1.128 3rd Qu.:1.134 3rd Qu.:1.131
## Max. :1.200 Max. :1.251 Max. :1.340 Max. :1.235
## ECT2_l NUSAP1_l GMPS_l UCHL5_l ORC6L_l
## Min. :0.913 Min. :0.881 Min. :0.879 Min. :0.933 Min. :0.79
## 1st Qu.:1.018 1st Qu.:1.044 1st Qu.:0.999 1st Qu.:1.054 1st Qu.:1.02
## Median :1.071 Median :1.095 Median :1.083 Median :1.086 Median :1.09
## Mean :1.079 Mean :1.095 Mean :1.074 Mean :1.089 Mean :1.08
## 3rd Qu.:1.131 3rd Qu.:1.147 3rd Qu.:1.148 3rd Qu.:1.129 3rd Qu.:1.15
## Max. :1.329 Max. :1.302 Max. :1.268 Max. :1.270 Max. :1.25
## TSPYL5_l MELK_l RUNDC1_l DIAPH3.1_l
## Min. :0.842 Min. :0.793 Min. :0.756 Min. :0.803
## 1st Qu.:1.037 1st Qu.:1.033 1st Qu.:0.982 1st Qu.:1.009
## Median :1.090 Median :1.078 Median :1.058 Median :1.076
## Mean :1.085 Mean :1.079 Mean :1.057 Mean :1.076
## 3rd Qu.:1.141 3rd Qu.:1.123 3rd Qu.:1.133 3rd Qu.:1.137
## Max. :1.286 Max. :1.340 Max. :1.323 Max. :1.310
## C16orf61_l TGFB3_l FGF18_l CDC42BPA_l DTL_l
## Min. :0.871 Min. :0.95 Min. :0.876 Min. :0.938 Min. :0.551
## 1st Qu.:1.034 1st Qu.:1.07 1st Qu.:1.051 1st Qu.:1.047 1st Qu.:0.854
## Median :1.067 Median :1.10 Median :1.099 Median :1.084 Median :1.046
## Mean :1.077 Mean :1.10 Mean :1.089 Mean :1.088 Mean :1.007
## 3rd Qu.:1.118 3rd Qu.:1.13 3rd Qu.:1.134 3rd Qu.:1.125 3rd Qu.:1.164
## Max. :1.279 Max. :1.24 Max. :1.248 Max. :1.248 Max. :1.359
## WISP1_l DIAPH3.2_l OXCT1_l ZNF533_l RFC4_l
## Min. :0.94 Min. :0.936 Min. :0.945 Min. :0.912 Min. :0.89
## 1st Qu.:1.07 1st Qu.:1.057 1st Qu.:1.068 1st Qu.:1.008 1st Qu.:1.07
## Median :1.11 Median :1.102 Median :1.102 Median :1.052 Median :1.10
## Mean :1.10 Mean :1.097 Mean :1.102 Mean :1.074 Mean :1.10
## 3rd Qu.:1.14 3rd Qu.:1.135 3rd Qu.:1.139 3rd Qu.:1.111 3rd Qu.:1.13
## Max. :1.22 Max. :1.214 Max. :1.294 Max. :1.352 Max. :1.25
## KNTC2_l FBXO31_l
## Min. :0.944 Min. :0.947
## 1st Qu.:1.035 1st Qu.:1.051
## Median :1.078 Median :1.083
## Mean :1.084 Mean :1.089
## 3rd Qu.:1.122 3rd Qu.:1.127
## Max. :1.280 Max. :1.268
#Based on summary results, categorical proportions are as follows:
#infection:
#bacterial_infection:69
#viral_infection :71
#sind
#symptoms_remain :93
#symptoms_finished:47
#gender
#female:62
#male :78
#no_hospitalization:73
#hospitalization :67
#Based on these results, it appears that clustering is more associated with sind (symptoms remain vs. finished) compared
#to type of infection or risk of hospitalization
#Clustering is graphically depicted as follows:
#plot(kdata, col=kcluster$cluster)
#points(kcluster$centers, col=1:2, pch=8, cex=2)
#Error in `plot.new()`:
#! figure margins too large
#Clustering individuals according to the kind of infection (nned to factor)
#k_infection<-as.factor(viral34_c$infection)
#kcluster_infection<-kmeans(k_infection, 2, nstart=10)
#kcluster_infection
#Clustering individuals according to the risk of hospitalization (nned to factor)
#k_hosp<-as.factor(viral34_c$hosp)
#kcluster_hosp<-kmeans(k_hosp, 2, nstart=10)
#kcluster_hosp
#If the factored column variables of infection and hospitalization are used for 2 kmeans clustering,
# the following error message is generated: Error in kmeans(kdata, 2, nstart = 10) : more cluster centers than distinct data points.
#In addition: Warning message:In storage.mode(x) <- "double" : NAs introduced by coercion
#This occurs because kmeans uses the mean of data points for clustering and our dataset is made of plain text or other type of factors
#(i.e not numbers). The 2 raw unfactored data columns can be used, or the data can be preprocessed via various ways for Categorical dataset,
#(e.g. "one hot encoding" method that transforms the 2 category column into 4 multiple columns that each indicate if the sample
#belongs to the relevant category (i.e column with 3 ancestries will get 3 new binary (1 or 0) columns.
# Other methods include ROCK algorithm (kaggle notebook)and "Kmode" which is similar to kmeans for categories and implemented in R.
#https://www.kaggle.com/code/vijjikiran/clustering-of-categorical-data/report
# k-means is the classical unsupervised clustering algorithm for numerical data. But computing the euclidean distance and the means in k-means algorithm doesn’t fare well with categorical data.
#So instead, I will therefore run categorical data through the following algorithms for clustering -
# (1)By applying one-hot encoding, the data will be converted to numeric data and then it will be run thru k-means.
# (2)The data will be run through k-modes algorithm that uses modes of categorical attributes instead of the means in k-means for clustering of categorical data.
# (3)The data will be run through the Rock(Robust clustering using links) algorithm that is designed for categorical variables - actually the categorical data is converted to booleans in this approach.
# I will evaluate how the purity of the clusters, a simple evaluative measure, is different for the each of the algorithms.
#Purity of clustering is a simple measure of the accuracy, which is between 0 and 1. 0 indicates poor clustering, and 1 indicates perfect clustering.
#Demonstrating 2 alternative methods:
#Method#1: Extract 2 unfactored categorical columns (infection and hospitalization) from original viral34 dataframe into new dataframe used for 2 kmeans
kdata1<-viral34[,c(1,5)]
head(kdata1)
## infection hosp
## 1 0 1
## 2 1 0
## 3 0 1
## 4 1 1
## 5 1 0
## 6 1 0
kcluster1<-kmeans(kdata1, 2, nstart=10)
kcluster1
## K-means clustering with 2 clusters of sizes 69, 71
##
## Cluster means:
## infection hosp
## 1 0 0.3768
## 2 1 0.5775
##
## Clustering vector:
## [1] 1 2 1 2 2 2 1 1 2 1 2 2 2 1 1 1 2 1 1 2 1 2 1 2 1 1 1 1 1 1 1 1 1 2 2 2 2
## [38] 2 2 1 2 1 2 2 2 2 2 1 2 2 1 2 1 1 2 1 1 1 1 2 2 2 2 2 1 2 2 1 1 2 1 2 2 2
## [75] 2 1 1 2 2 1 1 1 2 2 1 2 2 2 2 1 1 2 1 1 2 2 2 1 1 2 1 1 2 1 2 2 2 1 1 2 1
## [112] 2 1 2 2 1 2 1 1 2 2 2 1 2 1 1 1 1 2 1 1 2 2 1 2 1 1 1 1 2
##
## Within cluster sum of squares by cluster:
## [1] 16.20 17.32
## (between_SS / total_SS = 52.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
#Results: Within cluster sum of squares by cluster:
# [1] 17.32394 16.20290
# (between_SS / total_SS = 52.1 %)
plot(kdata1, col=kcluster1$cluster)
points(kcluster1$centers, col=1:2, pch=8, cex=2)
#YES! The clustering is visually related to infection risk and hostpitalization status#############
#Method#2: "Hot-start"
library(caret)
# Dummify data
dmy <- dummyVars(" ~ .", viral34_c)
trsf <- data.frame(predict(dmy, newdata = viral34_c))
#Show what dataframe looks like
kdata2<-trsf[,c(1,2,8,9)]
head(kdata2)
## infection.bacterial_infection infection.viral_infection
## 1 1 0
## 2 0 1
## 3 1 0
## 4 0 1
## 5 0 1
## 6 0 1
## hosp.no_hospitalization hosp.hospitalization
## 1 0 1
## 2 1 0
## 3 0 1
## 4 0 1
## 5 1 0
## 6 1 0
#Since 4 columns of categorical data were transformed, using 4 kmeans
kcluster2<-kmeans(kdata2, 4, nstart=10)
kcluster2
## K-means clustering with 4 clusters of sizes 43, 26, 41, 30
##
## Cluster means:
## infection.bacterial_infection infection.viral_infection
## 1 1 0
## 2 1 0
## 3 0 1
## 4 0 1
## hosp.no_hospitalization hosp.hospitalization
## 1 1 0
## 2 0 1
## 3 0 1
## 4 1 0
##
## Clustering vector:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 2 4 2 3 4 4 1 1 3 1 4 4 3 1 2 1 3 1 2 4
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 1 4 1 4 2 2 2 1 2 1 2 1 1 4 3 3 3 3 4 1
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 4 1 3 3 3 4 3 1 4 4 2 4 2 2 3 1 1 1 2 3
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 4 3 3 3 1 3 4 1 2 3 1 3 4 3 3 1 1 4 4 1
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 1 1 4 3 2 3 3 4 4 2 1 3 1 1 3 3 3 1 2 3
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 1 2 4 1 3 3 4 1 1 4 1 3 1 3 3 2 4 1 1 3
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 3 3 2 4 1 2 2 2 3 1 1 3 4 2 3 1 2 2 1 4
##
## Within cluster sum of squares by cluster:
## [1] 0 0 0 0
## (between_SS / total_SS = 100.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
#Results: K-means clustering with 4 clusters of sizes 43, 30, 41, 26
#I will later demonstrate two additional clustering approaches using the following libraries tools:
#For kmode
library(klaR)
#For ROCK
library(cba)
QUESTION 5 Perform PCA for exploring possible relationships BETWEEN INDIVIDUALS according to their (scaled) GENE EXPRESSION LEVELS. Provide the variance explained plot. How much variability is explained by the first two principal components? Which is the eigen-value of PC1 and how can be interpreted? Check, using concentration ellipses, whether PCA projections of individuals are associated to infection, gender, hospitalization or ancestry. Which are the 10 genes that most contribute to PC1 and PC2? (follow similar steps as in section 1.5.8 in “Solutions Exercises section 2”). Discuss the results.
X<-viral34_c[,58:107]
pcaX <-prcomp(X, scale =TRUE)
summary(pcaX)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Standard deviation 3.549 2.0941 1.6807 1.6212 1.5128 1.4407 1.3199 1.2180
## Proportion of Variance 0.252 0.0877 0.0565 0.0526 0.0458 0.0415 0.0348 0.0297
## Cumulative Proportion 0.252 0.3397 0.3962 0.4487 0.4945 0.5360 0.5709 0.6005
## PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
## Standard deviation 1.1524 1.0973 1.0868 1.0422 1.0277 0.9919 0.9643 0.9246
## Proportion of Variance 0.0266 0.0241 0.0236 0.0217 0.0211 0.0197 0.0186 0.0171
## Cumulative Proportion 0.6271 0.6512 0.6748 0.6965 0.7177 0.7373 0.7559 0.7730
## PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24
## Standard deviation 0.8979 0.8855 0.8715 0.8598 0.8316 0.7959 0.7584 0.7216
## Proportion of Variance 0.0161 0.0157 0.0152 0.0148 0.0138 0.0127 0.0115 0.0104
## Cumulative Proportion 0.7892 0.8048 0.8200 0.8348 0.8486 0.8613 0.8728 0.8832
## PC25 PC26 PC27 PC28 PC29 PC30 PC31
## Standard deviation 0.70540 0.69312 0.65886 0.62757 0.60388 0.57832 0.57115
## Proportion of Variance 0.00995 0.00961 0.00868 0.00788 0.00729 0.00669 0.00652
## Cumulative Proportion 0.89318 0.90279 0.91147 0.91935 0.92664 0.93333 0.93985
## PC32 PC33 PC34 PC35 PC36 PC37 PC38
## Standard deviation 0.55491 0.54517 0.52600 0.49787 0.48352 0.47144 0.44215
## Proportion of Variance 0.00616 0.00594 0.00553 0.00496 0.00468 0.00445 0.00391
## Cumulative Proportion 0.94601 0.95196 0.95749 0.96245 0.96712 0.97157 0.97548
## PC39 PC40 PC41 PC42 PC43 PC44 PC45
## Standard deviation 0.41424 0.40245 0.38027 0.35457 0.34884 0.32447 0.29799
## Proportion of Variance 0.00343 0.00324 0.00289 0.00251 0.00243 0.00211 0.00178
## Cumulative Proportion 0.97891 0.98215 0.98504 0.98756 0.98999 0.99210 0.99387
## PC46 PC47 PC48 PC49 PC50
## Standard deviation 0.28162 0.27687 0.24151 0.23198 0.19576
## Proportion of Variance 0.00159 0.00153 0.00117 0.00108 0.00077
## Cumulative Proportion 0.99546 0.99699 0.99816 0.99923 1.00000
#PC1 accounts for 0.252, PC2 accounts for 0.08771. PC3 accounts for 0.0565 for a total of 0.3962 of variance
plot(pcaX)
#Plotting the data on the first two principal components
PC1 <- pcaX$x[,1]
PC2 <- pcaX$x[,2]
plot(PC1,PC2)
#Plotting the data on the first two principal components and color the points according to “infection”
plot(PC1,PC2, col=viral34_c$infection, main = "Viral or Bacterial Infection")
legend("bottomleft", col=1:2, legend=levels(viral34_c$infection), pch=1, cex=0.7)
#There is no clear association between PCA projections and “infection”
#Plotting the data on the first two principal components and color the points according to “gender”
plot(PC1,PC2, col=viral34_c$gender, main = "Female or Male")
legend("bottomleft", col=1:2, legend=levels(viral34_c$gender), pch=1, cex=0.7)
#There is no clear association between PCA projections and “gender”
#Plotting the data on the first two principal components and color the points according to “hosp”
plot(PC1,PC2, col=viral34_c$hosp, main = "Hospitalization or No Hospitalization")
legend("bottomleft", col=1:2, legend=levels(viral34_c$hosp), pch=1, cex=0.7)
#There is no clear association between PCA projections and “hosp”
#Plotting the data on the first two principal components and color the points according to “ancestry”
#plot(PC1,PC2, col=viral34_c$ancestry, main = "A, B, or C")
#legend("bottomleft", col=factor(viral34_c$ancestry), legend=levels(viral34_c$ancestry), pch=1, cex=0.7)
#There is no clear association between PCA projections and “ancestry”
#To make the visualization of results easier we use the function ‘PCA’ from FactoMineR package that allows limiting the number of PC to be used:
library(FactoMineR)
pcaX<-PCA(X, scale.unit = TRUE, ncp = 3) # We use the first 3 PC
library("factoextra")
#PCA relies on eigenvalue decomposition of the covariance matrix. The following function provides the eigenvalues, proportions of variance explained and cumulative variance for each principal component.
eig.val<- get_eigenvalue(pcaX)
head(eig.val)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 12.599 25.198 25.20
## Dim.2 4.385 8.771 33.97
## Dim.3 2.825 5.650 39.62
## Dim.4 2.628 5.256 44.87
## Dim.5 2.289 4.577 49.45
## Dim.6 2.076 4.151 53.60
#Results:
# Interpretation:
# The total variance of a matrix is the sum of the variances of the variables. When variables are scaled, the total variance is equal to the number
# of variables (57 in our case) since the variance of each (scaled) original variable is 1.The eigen value of PC(i) is equal to the variance of PC(i).
# Thus, an eigen value larger than 1 indicates that PC accounts for more variance than the (scaled) original variables. For instance, the first eigen value in our dataset is 16.950630 which means that the PC1 accounts for as much variation as 17 original variables.
# The proportion of variance explained is obtained by dividing the variance (eigen value) by the total variance:
# % variance = 100·eigen_value/total_variance. In our case: % variance explained by PC1 = 100·12.598858/57 = 25.197716%
#Variance explained plot:
fviz_eig(pcaX, addlabels = TRUE, ylim = c(0, 30))
#Variables plot:
fviz_pca_var(pcaX, col.var = "black")
#The following function provides a list of matrices containing all the relevant information in a PCA, like the contributions of each variable to each PC ($contrib):
var <- get_pca_var(pcaX)
var
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
library(corrplot)
#We order the results according to the contribution value and restrict the 10 most important variables for PC1. Now we can see the list of the most relevant genes for PC1:
corrplot(var$contrib[order(var$contrib[,1],decreasing = T)[1:10],], is.corr=FALSE)
#Results: The 10 most important genes for PC1=ALDH4A1, GMPS, RFC4, TSPYL5, NUSAP1, CDC42BPA, ORC6L, AYTL2, C16orf61, FGF18, (COL4A2 in DIM3?)
#We order the results according to the contribution value and restrict the 10 most important variables for PC2:
corrplot(var$contrib[order(var$contrib[,2],decreasing = T)[1:10],], is.corr=FALSE)
#The 10 most important genes for PC2= SERF1A, WISP1,FLT1,FBXo31,GPR180,ZNF533,EXT1,DCK,ORC6L, Contig40831_RC
#We order the results according to the contribution value and restrict the 10 most important variables for PC3:
corrplot(var$contrib[order(var$contrib[,3],decreasing = T)[1:10],], is.corr=FALSE)
#The 10 most important genes for PC3= SLC2A3,COL4A2,PECI,GPR126,ZNF533,MMP9,MTDH,Contig63649_RC,MELK, OXCT1
#We can also plot the most important variables to PC1 as follows:
fviz_contrib(pcaX, choice = "var", axes = 1, top = 10)
#Contributions of variables to PC2
fviz_contrib(pcaX, choice = "var", axes = 2, top = 10)
#Plot of the variables according to contrib values:
fviz_pca_var(pcaX, col.var = "contrib",
gradient.cols = c(0,0,4,4),#c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
#Graphical representation of individuals
ind <- get_pca_ind(pcaX)
ind
## Principal Component Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
fviz_pca_ind(pcaX)
#We check whether the PCA projection of individuals is related to the “infection” variable by adding concentration ellipses by event indicator.
#We see that the two ellipses overlap which means no association between infection type and PCA projections according to gene expression profiles.
# show points only (nbut not "text")
# color by groups NOT AS FACTOR!!!!!!!!!
# Concentration ellipses
fviz_pca_ind(pcaX,
geom.ind = "point",
col.ind = viral34_c$infection,
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE,
legend.title = "infection"
)
#Now we check whether the PCA projection of individuals is related to the “gender” variable by adding concentration ellipses
#by gender indicator. We see that the two ellipses are separated which implies different gene expression profiles between
#positive and negative individuals. Large values of PC1 and small values of PC2 are related to gender negative.
# Concentration ellipses
# color by groups
# show points only (nbut not "text")
fviz_pca_ind(pcaX,
geom.ind = "point",
col.ind = viral34_c$gender,
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE,
legend.title = "gender"
)
#Now we check whether the PCA projection of individuals is related to the “hospitalization”: Some separation
fviz_pca_ind(pcaX,
geom.ind = "point", # show points only (nbut not "text")
col.ind = viral34_c$hosp, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "hosp"
)
#Now we check whether the PCA projection of individuals is related to the “ancestry”: Some separation
fviz_pca_ind(pcaX,
geom.ind = "point", # show points only (nbut not "text")
col.ind = viral34_c$ancestry, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "ancestry"
)
QUESTION 6 Perform a nice heatmap with dendrograms for genes and individuals, individuals divided in two groups according to k-means (k=2), and annotations for infection and hospitalization (similar to the one proposed in section 1.4 in “Solutions Exercises section 2”).
heatmap(as.matrix(viral34_c[,8:57])) # Using NON-SCALED gene expression levels
#Using instead ‘Heatmap()’ function from ComplexHeatmap package with examples at following website:
#https://www.datanovia.com/en/lessons/heatmap-in-r-static-and-interactive-visualization/
library(ComplexHeatmap)
## ========================================
## ComplexHeatmap version 2.20.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
#Using already-scaled gene expression levels
# Text size for row names
#title of legend
# individuals are divided into 4 groups using Kmeans clustering
set.seed(1234)
Heatmap(viral34_c[,58:107],
name = "viral34_c",
column_title = "genes", row_title = "individuals",
row_names_gp = gpar(fontsize = 7),
km=2,
show_row_names = FALSE, show_column_names = T
)+Heatmap(viral34_c$infection, name = "infection", width = unit(5, "mm"), col=c(2,1))+ Heatmap(viral34_c$hosp, name = "hosp", width = unit(5, "mm"), col=c(4,5))
## Warning: The input is a data frame-like object, convert it to a matrix.
#Now repeating with transposition:
heatmap(as.matrix(t(viral34_c[,8:57]))) # Using NON-SCALED gene expression levels
#set.seed(1234)
#Heatmap(t(viral34_c[,58:107]),
# name = "viral34_c",
# column_title = "individuals", row_title = "genes",
# row_names_gp = gpar(fontsize = 7),
# km=2,
# show_row_names = FALSE, show_column_names = T
#)+Heatmap(viral34_c$infection, name = "infection", width = unit(5, "mm"), col=c(2,1))+ Heatmap(viral34_c$hosp, name = "hosp", width = unit(5, "mm"), col=c(4,5))
#OBSERVATIONS FROM HEATMAP:
#I observed 4 gene clusters and 2 individual clusters with following observations:
#(LEGEND: GC=GENE CLUSTER, IC=INDIVIDUAL CLUSTER, Over=Over-expressed(red), Under=Underexpressed(blue))
#GC#1(Left of Dendogram): IC#1 (Left of Dendogram)=Over, IC#2=Under
#GC#2: IC#1=Under, IC#2=Over
#GC#3: IC#1=Over, IC#2=Under
#GC#4:For IC#1: DTL=Under, Contig35251_RC=Under, RUNDC1=Over, BBC3=Under, ECT2=Over, MTDH=Under, ZNF533=Over
#GC#4: For IC#2: DTL=Over, (right CONTIG35251_RC=Over, Rest=Under), DCK=Under, MTDH=Over, BBC3=Under,MELK=Over
#IC#1: Left=Viral, Hospitalization; Middle=Bacterial, Right=Viral
#IC#2: (Right=Hospitalization, Viral Infection), Left= Bacterial, Middle: No Hospitalization
# You can see that the last cluster is strongly associated to negative ER and is characterized by low expression of the genes
# on the right and high expression of the rest. The other three cluster mainly correspond to ER positive individuals.
# We can observe that in the first cluster, that is characterized by high expression of the genes on the right and
# low expression of the rest, is the cluster with few events, i.e. this gene expression pattern is associated to better prognosis.
library(glmnet)
library(penalized)
## Welcome to penalized. For extended examples, see vignette("penalized").
#EXAMINING BRIEFLY REALTIONSHIPS BETWEEN 10 genes:
# PerformanceAnalytics::chart.Correlation() #Argument R missing?
# corrr::network_plot() #Argument rdf missing?
# psych::pairs.panels() #Argument x missing?
# corrplot::corrplot.mixed() #Argument corr missing?
# GGally::ggpairs() #Argument data missing?
# ggcorrplot::ggcorrplot() #Argument corr missing?
library("PerformanceAnalytics")
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(viral34_c[,8:18], histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
library(corrr)
network_plot(correlate(viral34_c[,8:18]), min_cor=0.6)
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
library(dplyr)
viral34_c[8:18] %>% correlate() %>% network_plot(min_cor=0.6)
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:outliers':
##
## outlier
## The following object is masked from 'package:randomForest':
##
## outlier
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
pairs.panels(viral34_c[8:18], scale=TRUE)
library(corrplot)
corrplot.mixed(cor(viral34_c[8:18]), order="hclust", tl.col="black")
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(viral34_c[8:18])
ggcorr(viral34_c[8:18], nbreaks=8, palette='RdGy', label=TRUE, label_size=5, label_color='white')
library(ggcorrplot)
ggcorrplot(cor(viral34_c[8:18]), p.mat = cor_pmat(mtcars), hc.order=TRUE, type='lower')
QUESTION 7 Test if the mean expression levels of the first gene are different between viral and bacterial infections. An alpha=0.05 is assumed:
#Testing the following hypothesis:
#H0=Null Hypothesis: H0: u1=u2
#H1=Alternative Hypothesis: H1: u1!=u2
#Determine if gene GSTM3 (column#8) is a continuous, numerical variable, for which a mean and sd can be calculated:
#In general, scale variables for PCA but NOT for t-test, anova, etc, as these methods take advantage of the standard deviation
#of the data to implement the analysis
is.numeric(viral34_c$GSTM3)
## [1] TRUE
#Results show this is numeric (TRUE)
#Determine if non-scaled (raw) expression levels of first gene follows a normal distribution via shapiro.test(x):
#H0=Null Hypothesis: H0: X~N(u, sig)
#H1=Alternative Hypothesis: H1: X !~N(u, sig)
shapiro.test(viral34_c$GSTM3)
##
## Shapiro-Wilk normality test
##
## data: viral34_c$GSTM3
## W = 0.97, p-value = 0.004
##Because the p-value=0.00387<0.05, the non-scaled (raw) expression levels of first gene is not normally distributed
#Determine if Z-normalized/standardized/scaled expression levels of first gene follows a normal distribution via shapiro.test(x):
#H0=Null Hypothesis: Ho: X~Nu, sig)
#H1=Alternative Hypothesis: H1: X !~N(u, sig)
shapiro.test(viral34_c$GSTM3_s)
##
## Shapiro-Wilk normality test
##
## data: viral34_c$GSTM3_s
## W = 0.97, p-value = 0.004
#Because the p-value=0.00387<0.05, the Z-normalized/standardized/scaled expression levels of first gene is not normally distributed
#Determine if log-transformed expression levels of first gene follows a normal distribution via shapiro.test(x):
shapiro.test(viral34_c$GSTM3_l)
##
## Shapiro-Wilk normality test
##
## data: viral34_c$GSTM3_l
## W = 0.98, p-value = 0.05
#Because the p-value=0.0463<=0.05 (JUST BARELY!), the log-transformed expression levels of first gene also is not normally distributed
#Since neither GSTM, GSTM3_s or GSTM3_l are normally distributed, we apply a Wilcoxon test:
wilcox.test(viral34_c$GSTM3 ~ viral34_c$infection)
##
## Wilcoxon rank sum test with continuity correction
##
## data: viral34_c$GSTM3 by viral34_c$infection
## W = 2181, p-value = 0.3
## alternative hypothesis: true location shift is not equal to 0
#Result: p-value=0.264>0.05
wilcox.test(viral34_c$GSTM3_s ~ viral34_c$infection)
##
## Wilcoxon rank sum test with continuity correction
##
## data: viral34_c$GSTM3_s by viral34_c$infection
## W = 2181, p-value = 0.3
## alternative hypothesis: true location shift is not equal to 0
#Result: p-value=0.264>0.05
wilcox.test(viral34_c$GSTM3_l ~ viral34_c$infection)
##
## Wilcoxon rank sum test with continuity correction
##
## data: viral34_c$GSTM3_l by viral34_c$infection
## W = 2181, p-value = 0.3
## alternative hypothesis: true location shift is not equal to 0
#Result: p-value=0.264>0.05
# We therefore DO NOT REJECT null hypothesis that means are equal. We conclude that there is NO statistically significant evidence to
#suggest that the means of first gene are different between viral and bacterial infections are different.
QUESTION 8 Test if the mean expression levels of the first gene are different among ancestry groups. An alpha=0.05 is assumed:
#Testing the following hypothesis:
#H0=Null Hypothesis: H0: u1=u2=u3 (equality of mean GSTM3 expression levels for all 3 ancestry types )
#H1=Alternative Hypothesis: H1: u1!=u2!=!u3 (at least 1 of 3 GSTM3 expression levels for all 3 ancestry types means is different)
#Since GSTM3 gene expression levels was found to not be normally distributed we apply a Kruskal-Wallis test:
kruskal.test(viral34_c$GSTM3 ~ viral34_c$ancestry)
##
## Kruskal-Wallis rank sum test
##
## data: viral34_c$GSTM3 by viral34_c$ancestry
## Kruskal-Wallis chi-squared = 0.46, df = 2, p-value = 0.8
#Results: p-value=0.7959>0.05
#Therefore, there is no statistically significant evidence that the mean GSTM3 expression levels are different for all 3 ancestry types.
#WE do not reject the null hypothesis that these means are equal.
# However, ONLY for demonstration and to be comprehensive here, let's ASSUME that the (log-transformed) GSTM3_l expression levels were found previously to be normally
#distributed (as the p-value from shapiro.test was almost > 0.05). Therefore, in this case, we would perform a
#1-WAY ANOVA test as follows:One-factor ANOVAis as test for association between a continuous variable Y and a factor (categorical variable) X with k categories. The statistical process is derived
#from the decomposition of the total variability in two components: the variability between groups and the variability within groups. Under the ANOVA assumptions, the ratio of the two sources of variability follows the F
#distribution.
#First we test whether the variances are equal (homoscedasticity)
library(lmtest)
bptest(lm(viral34_c$GSTM3_l ~ viral34_c$ancestry),studentize = F)
##
## Breusch-Pagan test
##
## data: lm(viral34_c$GSTM3_l ~ viral34_c$ancestry)
## BP = 4.4, df = 2, p-value = 0.1
#Results: p-value=0.1116>0.05. Therefore, homoscesaticity is fulfilled.
#Then, we perform one-factor ANOVA:
summary(aov(viral34_c$GSTM3_l ~ viral34_c$ancestry))
## Df Sum Sq Mean Sq F value Pr(>F)
## viral34_c$ancestry 2 0.001 0.00027 0.06 0.94
## Residuals 137 0.584 0.00426
#Results: Because Pr(>F)=0.938>0.05, we conclude that there is statistically no significant
#difference in population mean GSTM3 gene expression levels between 3 ancestral groups A, B, C
#The higher the F-value, the lower the corresponding p-value. With p-value > threshold (e.g. alpha= . 05),
#we cannot reject the null hypothesis of the ANOVA and cannot conclude that there is a statistically significant
#difference between ancestry group means.
#We can follow-up by
tapply(viral34_c$GSTM3_l,viral34_c$ancestry, mean)
## A B C
## 1.099 1.100 1.093
#Also:
boxplot(viral34_c$GSTM3_l~viral34_c$ancestry)
#These confirm that the expression means among ancestries are similar
#If we had rejected and found means to be different, then TukeyHSD(anova1factor) can be used:
QUESTION 9 Test whether mean expression levels of the first and second genes are equal for viral infections.An alpha=0.05 is assumed:
##This is paired data since there is a pair of values (gene 1 and gene2) for each individual. Because earlier it was
#shown that Gene 1 expression values are not normally distributed, I will use the wilcox test for testing equality of
#means for paired, non-normally-distributed data as alternative to to the t-test for equality of means for normally distributed data.
#Hypothesis:
#H0: distribution A = Distribution B
#H1: distribution A = Distribution B
#Preliminarily, I tested to see if non-scaled Gene #2 expression values were also normally distributed:
shapiro.test(viral34_c$RP5.860F19.3)
##
## Shapiro-Wilk normality test
##
## data: viral34_c$RP5.860F19.3
## W = 0.98, p-value = 0.02
#Results: Because, p-value=0.01577<=0.01577, we conclude that Gene#2 Expression values are also not normally distributed
#Furthermore, I tested for correlation between the 2 raw (non-scaled) gene extression levels
#Hypothesis:
#Ho: rho = 0 Gene1 and Gene2 are uncorrelated
#H1: rho not equal 0 Gene1 and Gene2 are correlated
#Using Spearman correlation for non-normally distributed observations
cor(viral34_c$GSTM3,viral34_c$RP5.860F19.3, method="spearman")
## [1] 0.6124
cor.test(viral34_c$GSTM3,viral34_c$RP5.860F19.3,method="spearman")
##
## Spearman's rank correlation rho
##
## data: viral34_c$GSTM3 and viral34_c$RP5.860F19.3
## S = 177236, p-value <2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6124
#Results indicated a p-value=2.2E-16<=0.05 and also a high correlation coefficient of 0.612, suggesting that
#both genes are correlated and allowing us to reject null hypothesis that they are uncorrelated
#Finally, I tested for equality of Gene1 mean expression values to Gene mean expression vales for viral infection.
#Because I am only evaluating mean gene 1 and 2 expression values for viral infection, I first created a subset dataframe to
#contain these 2 columns: #slice(), select(1:3)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ psych::alpha() masks ggplot2::alpha()
## ✖ randomForest::combine() masks Biobase::combine(), BiocGenerics::combine(), dplyr::combine()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ purrr::lift() masks caret::lift()
## ✖ randomForest::margin() masks ggplot2::margin()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ MASS::select() masks dplyr::select()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
subviral34 <- viral34_c %>% filter(infection == "viral_infection")
#Now, I test whether non-scaled mean expression levels of the first and second genes are equal for viral infections using
#Wilcoxon rank test for the equality of two means for paired data:
wilcox.test(subviral34$GSTM3,subviral34$RP5.860F19.3,paired=T)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subviral34$GSTM3 and subviral34$RP5.860F19.3
## V = 1148, p-value = 0.5
## alternative hypothesis: true location shift is not equal to 0
#Results show that because p-value=0.4581>0.05, we cannot reject the null hypothesis that the
#population mean expression levels of the first and second genes are equal. There is not enough statistically significant
#evidence to suggest that means are different.
QUESTION 10 Perform a nonparametric test for association of the kind of infection (viral or bacterial) and the risk of hospitalization.Provide the OR of the risk of hospitalization for viral vs bacterial infections.
#Using females as reference group based on default alphabetical order and to yield an OR>1:
library("epitools")
##
## Attaching package: 'epitools'
## The following object is masked from 'package:survival':
##
## ratetable
# var1=risk factor, var2=hospitalization_status
table<-table(viral34_c$infection, viral34_c$hosp)
table
##
## no_hospitalization hospitalization
## bacterial_infection 43 26
## viral_infection 30 41
# OR of having the disease(Y=1) for the category in the
oddsratio(table)
## $data
##
## no_hospitalization hospitalization Total
## bacterial_infection 43 26 69
## viral_infection 30 41 71
## Total 73 67 140
##
## $measure
## odds ratio with 95% C.I.
## estimate lower upper
## bacterial_infection 1.000 NA NA
## viral_infection 2.242 1.142 4.473
##
## $p.value
## two-sided
## midp.exact fisher.exact chi.square
## bacterial_infection NA NA NA
## viral_infection 0.0188 0.01904 0.0175
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "median-unbiased estimate & mid-p exact CI"
# 2nd row with respect to reference group (1rst row)
#Based on results, the Odds Ratio (OR) = 2.242376 suggests that viral infections are 2.24X higher risk of hospitalization compared
#to bacterial infection.
QUESTION 11 Test the normality of expression levels of the 50 genes (use function apply). How many genes are not normally distributed and which are their names?
pvector<-apply(viral34_c[,8:57], 2, function(x) shapiro.test(x)$p.value)
pvector
## GSTM3 RP5.860F19.3 BBC3 MMP9 Contig35251_RC
## 3.870e-03 1.577e-02 3.992e-01 6.833e-01 8.627e-07
## Contig40831_RC ALDH4A1 SERF1A SCUBE2 MTDH
## 6.319e-01 7.705e-01 2.491e-02 2.754e-02 3.443e-01
## DCK FLT1 PECI.1 QSCN6L1 DIAPH3
## 7.808e-03 4.557e-01 1.592e-01 2.166e-03 8.208e-02
## SLC2A3 GPR180 RTN4RL1 Contig32125_RC STK32B
## 5.022e-02 8.169e-02 2.064e-03 8.139e-01 3.931e-01
## EXT1 COL4A2 PECI GNAZ AYTL2
## 8.380e-01 9.376e-01 3.960e-03 7.872e-04 6.778e-04
## Contig63649_RC RAB6B AA555029_RC GPR126 ECT2
## 3.678e-01 5.207e-04 1.692e-03 5.015e-01 1.502e-02
## NUSAP1 GMPS UCHL5 ORC6L TSPYL5
## 9.286e-01 1.914e-02 5.756e-01 1.224e-01 9.740e-01
## MELK RUNDC1 DIAPH3.1 C16orf61 TGFB3
## 1.190e-01 1.782e-01 8.669e-01 4.056e-02 5.729e-01
## FGF18 CDC42BPA DTL WISP1 DIAPH3.2
## 1.450e-01 1.975e-01 2.683e-03 3.204e-01 1.583e-01
## OXCT1 ZNF533 RFC4 KNTC2 FBXO31
## 3.130e-01 7.075e-09 9.092e-02 7.851e-05 2.450e-01
#Counting the genes that are not normally distributed:
length(which(pvector<=0.05))
## [1] 19
#Based on results, there are 19 genes that are not normally distributed
#Getting names of genes that are not normally distributed:
non_normal_genes<-(names(which(pvector<=0.05)))
non_normal_genes
## [1] "GSTM3" "RP5.860F19.3" "Contig35251_RC" "SERF1A"
## [5] "SCUBE2" "DCK" "QSCN6L1" "RTN4RL1"
## [9] "PECI" "GNAZ" "AYTL2" "RAB6B"
## [13] "AA555029_RC" "ECT2" "GMPS" "C16orf61"
## [17] "DTL" "ZNF533" "KNTC2"
QUESTION 12 Identify those genes that are differentially expressed between viral and bacterial infections (use function apply). Create a function that checks whether the gene expression levels are normally distributed or not and, accordingly, applies the most appropriate test for comparing gene expression levels between viral and bacterial infections. Adjust the p-values for multiple testing according to an fdr threshold equal to 0.1. Interpret the results.
#Function will ultimately perform a statistical test for the equality of two means (gene expression levels with viral vs. bacterial infection)
# I tried this alternave with "apply" but was not successful:
#pvector<-apply(viral34_c[,8:57], 2, function(x) shapiro.test(x)$p.value)
diffex<-function(df, alpha){
#Initialize empty variables that will contain the pvalues of the k different t-tests
pfinal<-NULL
pnormal<-vector()
pvar<-c()
pval<-vector()
#pval<-numeric()
for(i in 8:57){
pnormal<-shapiro.test(df[,i])$p.value
if(pnormal<=alpha){
pval<-wilcox.test(df[,i] ~ df$infection)$p.value
names(pval) <- colnames(df)[i]
}
else {
pvar<-var.test(df[,i]~df$infection)$p.value
if(pvar<=alpha){
pval<-t.test(df[,i] ~ df$infection, var.equal=F)$p.value
names(pval) <- colnames(df)[i]
}
else{
pval<-t.test(df[,i] ~ df$infection, var.equal=T)$p.value
names(pval) <- colnames(df)[i]
}
}
pfinal<-c(pval,pfinal) # Add new pval to the pfinal vector
}
#close loop
#Adjusting p-values using conservative Bonferroni multiple testing correction
#pfinal_bonferroni<-p.adjust(pfinal, method = "bonferroni", n = length(pfinal))
#Adjusting p-values using using Benjamini & Hochberg multiple testing correction
pfinal_fdr<-p.adjust(pfinal, method = "fdr", n = length(pfinal))
#q=0.1 for FDR()????????????????????????????????
names(pfinal_fdr) <- names(pfinal)
return(pfinal_fdr)
}#close function
#Note, could not find FDR function allowing specificatio of fdr=0.1 cutoff thershold=
#Getting all calculated, adjusted p-values from the statistical tests:
pvalues<-(diffex(viral34_c, 0.05))
pvalues
## FBXO31 KNTC2 RFC4 ZNF533 OXCT1
## 0.9341046 0.6264431 0.1048154 0.8478197 0.4160092
## DIAPH3.2 WISP1 DTL CDC42BPA FGF18
## 0.0008148 0.8345801 0.4097221 0.3086543 0.0615507
## TGFB3 C16orf61 DIAPH3.1 RUNDC1 MELK
## 0.8290377 0.5750973 0.7770385 0.9341046 0.9341046
## TSPYL5 ORC6L UCHL5 GMPS NUSAP1
## 0.0373311 0.1674404 0.9063010 0.4097221 0.1048154
## ECT2 GPR126 AA555029_RC RAB6B Contig63649_RC
## 0.1100606 0.0277589 0.5882743 0.4160092 0.1674404
## AYTL2 GNAZ PECI COL4A2 EXT1
## 0.3086543 0.5882743 0.9341046 0.4160092 0.8339875
## STK32B Contig32125_RC RTN4RL1 GPR180 SLC2A3
## 0.7355522 0.4097221 0.4160092 0.5512460 0.5882743
## DIAPH3 QSCN6L1 PECI.1 FLT1 DCK
## 0.8339875 0.9063010 0.3086543 0.5201829 0.5882743
## MTDH SCUBE2 SERF1A ALDH4A1 Contig40831_RC
## 0.2463937 0.8663983 0.8663983 0.0146467 0.3086543
## Contig35251_RC MMP9 BBC3 RP5.860F19.3 GSTM3
## 0.3086543 0.0027674 0.9063010 0.4632253 0.5076624
#Getting number of significant results obtained after Benjamini & Hochberg correction:
num_sig<-sum((diffex(viral34_c, 0.05)<0.05))
num_sig
## [1] 5
#Getting names of genes whose non-scaled expression levels are significantly different among infection type:
diff_genes_names<-(names(which(diffex(viral34_c, 0.05)<=0.05)))
diff_genes_names
## [1] "DIAPH3.2" "TSPYL5" "GPR126" "ALDH4A1" "MMP9"
#Based on results, there are 5 such nd genes: "DIAPH3.2", "TSPYL5", "GPR126", "ALDH4A1", "MMP9"
QUESTION 13 Consider a regression model for the kind of infection as a function of gender, age and ancestry and the first 10 genes (scaled). Use stepwise variable selection and denote the selected model as “best.model”. Interpret the obtained model.
#BACKGROUND: The logistic regression is used with dichotomous dependent variables.A generalized regression model is to be fitted because the response variable "type of infection: is a categorical variable with binomial
#probabilistic outcome (Y=0/Y=1) where the probability is bound by an interval of [0,1], necessitating a logit transformation.
#The FULL fitted model will be obtained first before step-wise variable selection:
library(glmnet)
#In general, it is recommended to center the age predictor by subtracting the mean:
m<-mean(viral34_c$age)
m
## [1] 44.25
##Results: [1] 44.25
c.age<-(viral34_c$age)-m
mean(c.age)
## [1] 0
#This corrected age column is added to the viral34_c dataframe:
viral34_ca<-cbind(viral34_c,c.age)
summary(viral34_ca)
## infection stime sind gender
## bacterial_infection:69 Min. : 0.055 symptoms_remain :93 female:62
## viral_infection :71 1st Qu.: 4.695 symptoms_finished:47 male :78
## Median : 6.962
## Mean : 7.356
## 3rd Qu.:10.057
## Max. :17.659
## hosp age ancestry GSTM3
## no_hospitalization:73 Min. :26.0 Length:140 Min. :-0.3594
## hospitalization :67 1st Qu.:41.0 Class :character 1st Qu.:-0.1455
## Median :45.0 Mode :character Median :-0.0203
## Mean :44.2 Mean : 0.0053
## 3rd Qu.:49.0 3rd Qu.: 0.1233
## Max. :53.0 Max. : 0.5561
## RP5.860F19.3 BBC3 MMP9 Contig35251_RC
## Min. :-0.4242 Min. :-1.0828 Min. :-0.4943 Min. :-0.9177
## 1st Qu.:-0.1072 1st Qu.:-0.3333 1st Qu.:-0.1605 1st Qu.:-0.5925
## Median : 0.0087 Median :-0.0953 Median :-0.0476 Median :-0.4027
## Mean : 0.0156 Mean :-0.1130 Mean :-0.0370 Mean :-0.2517
## 3rd Qu.: 0.1031 3rd Qu.: 0.1110 3rd Qu.: 0.0880 3rd Qu.: 0.0437
## Max. : 0.5938 Max. : 0.6018 Max. : 0.5168 Max. : 0.9944
## Contig40831_RC ALDH4A1 SERF1A SCUBE2
## Min. :-0.4715 Min. :-0.7679 Min. :-0.5563 Min. :-0.5152
## 1st Qu.:-0.1256 1st Qu.:-0.1749 1st Qu.:-0.0984 1st Qu.:-0.1291
## Median : 0.0270 Median :-0.0041 Median : 0.0049 Median :-0.0226
## Mean : 0.0055 Mean :-0.0277 Mean :-0.0070 Mean :-0.0243
## 3rd Qu.: 0.1225 3rd Qu.: 0.1378 3rd Qu.: 0.0900 3rd Qu.: 0.0749
## Max. : 0.4185 Max. : 0.6030 Max. : 0.3561 Max. : 0.4372
## MTDH DCK FLT1 PECI.1
## Min. :-0.6756 Min. :-0.909 Min. :-0.4826 Min. :-0.4336
## 1st Qu.:-0.2933 1st Qu.:-0.529 1st Qu.:-0.1008 1st Qu.:-0.1396
## Median :-0.0834 Median :-0.340 Median : 0.0189 Median :-0.0403
## Mean :-0.0867 Mean :-0.321 Mean :-0.0005 Mean :-0.0336
## 3rd Qu.: 0.0738 3rd Qu.:-0.160 3rd Qu.: 0.0897 3rd Qu.: 0.0588
## Max. : 0.6406 Max. : 0.599 Max. : 0.5083 Max. : 0.5128
## QSCN6L1 DIAPH3 SLC2A3 GPR180
## Min. :-0.3794 Min. :-0.4493 Min. :-0.3716 Min. :-0.3552
## 1st Qu.:-0.0466 1st Qu.:-0.1120 1st Qu.:-0.0777 1st Qu.:-0.0803
## Median : 0.0078 Median :-0.0058 Median : 0.0005 Median :-0.0206
## Mean : 0.0217 Mean :-0.0109 Mean : 0.0114 Mean :-0.0137
## 3rd Qu.: 0.0981 3rd Qu.: 0.0992 3rd Qu.: 0.0806 3rd Qu.: 0.0598
## Max. : 0.5401 Max. : 0.3549 Max. : 0.4642 Max. : 0.3306
## RTN4RL1 Contig32125_RC STK32B EXT1
## Min. :-0.6646 Min. :-0.5321 Min. :-0.4804 Min. :-0.4778
## 1st Qu.:-0.2055 1st Qu.:-0.1135 1st Qu.:-0.1429 1st Qu.:-0.1675
## Median : 0.0046 Median :-0.0090 Median :-0.0235 Median :-0.0558
## Mean :-0.0414 Mean :-0.0110 Mean :-0.0412 Mean :-0.0519
## 3rd Qu.: 0.1318 3rd Qu.: 0.0734 3rd Qu.: 0.0449 3rd Qu.: 0.0605
## Max. : 0.4281 Max. : 0.4563 Max. : 0.4580 Max. : 0.3741
## COL4A2 PECI GNAZ AYTL2
## Min. :-0.5987 Min. :-0.4423 Min. :-0.3175 Min. :-0.6943
## 1st Qu.:-0.1979 1st Qu.:-0.1942 1st Qu.:-0.0956 1st Qu.:-0.1319
## Median :-0.0528 Median :-0.0637 Median :-0.0164 Median :-0.0460
## Mean :-0.0596 Mean :-0.0373 Mean : 0.0101 Mean :-0.0252
## 3rd Qu.: 0.0627 3rd Qu.: 0.0966 3rd Qu.: 0.0834 3rd Qu.: 0.0654
## Max. : 0.5602 Max. : 0.6090 Max. : 0.4306 Max. : 0.5336
## Contig63649_RC RAB6B AA555029_RC GPR126
## Min. :-0.3654 Min. :-0.5692 Min. :-0.431 Min. :-0.3797
## 1st Qu.:-0.0984 1st Qu.:-0.1431 1st Qu.:-0.160 1st Qu.:-0.1361
## Median :-0.0249 Median :-0.0522 Median :-0.001 Median :-0.0105
## Mean :-0.0094 Mean :-0.0172 Mean :-0.021 Mean :-0.0164
## 3rd Qu.: 0.0900 3rd Qu.: 0.0896 3rd Qu.: 0.107 3rd Qu.: 0.0978
## Max. : 0.3205 Max. : 0.4946 Max. : 0.820 Max. : 0.4393
## ECT2 NUSAP1 GMPS UCHL5
## Min. :-0.5077 Min. :-0.5863 Min. :-0.5915 Min. :-0.4585
## 1st Qu.:-0.2311 1st Qu.:-0.1607 1st Qu.:-0.2841 1st Qu.:-0.1311
## Median :-0.0813 Median :-0.0093 Median :-0.0451 Median :-0.0386
## Mean :-0.0500 Mean :-0.0029 Mean :-0.0605 Mean :-0.0242
## 3rd Qu.: 0.0984 3rd Qu.: 0.1504 3rd Qu.: 0.1528 3rd Qu.: 0.0921
## Max. : 0.7757 Max. : 0.6765 Max. : 0.5519 Max. : 0.5607
## ORC6L TSPYL5 MELK RUNDC1
## Min. :-0.7968 Min. :-0.6789 Min. :-0.7898 Min. :-0.870
## 1st Qu.:-0.2140 1st Qu.:-0.1786 1st Qu.:-0.1895 1st Qu.:-0.331
## Median :-0.0244 Median :-0.0244 Median :-0.0611 Median :-0.118
## Mean :-0.0517 Mean :-0.0320 Mean :-0.0493 Mean :-0.106
## 3rd Qu.: 0.1501 3rd Qu.: 0.1313 3rd Qu.: 0.0744 3rd Qu.: 0.104
## Max. : 0.5067 Max. : 0.6178 Max. : 0.8189 Max. : 0.753
## DIAPH3.1 C16orf61 TGFB3 FGF18
## Min. :-0.7682 Min. :-0.6119 Min. :-0.4152 Min. :-0.5978
## 1st Qu.:-0.2564 1st Qu.:-0.1889 1st Qu.:-0.0924 1st Qu.:-0.1404
## Median :-0.0683 Median :-0.0931 Median :-0.0053 Median : 0.0015
## Mean :-0.0539 Mean :-0.0591 Mean :-0.0023 Mean :-0.0232
## 3rd Qu.: 0.1179 3rd Qu.: 0.0587 3rd Qu.: 0.0827 3rd Qu.: 0.1070
## Max. : 0.7049 Max. : 0.5941 Max. : 0.4397 Max. : 0.4822
## CDC42BPA DTL WISP1 DIAPH3.2
## Min. :-0.4444 Min. :-1.264 Min. :-0.4404 Min. :-0.4510
## 1st Qu.:-0.1519 1st Qu.:-0.651 1st Qu.:-0.0876 1st Qu.:-0.1221
## Median :-0.0436 Median :-0.153 Median : 0.0240 Median : 0.0088
## Mean :-0.0264 Mean :-0.209 Mean : 0.0131 Mean :-0.0009
## 3rd Qu.: 0.0804 3rd Qu.: 0.203 3rd Qu.: 0.1223 3rd Qu.: 0.1127
## Max. : 0.4842 Max. : 0.892 Max. : 0.3755 Max. : 0.3669
## OXCT1 ZNF533 RFC4 KNTC2
## Min. :-0.4278 Min. :-0.5109 Min. :-0.5636 Min. :-0.4311
## 1st Qu.:-0.0905 1st Qu.:-0.2613 1st Qu.:-0.0825 1st Qu.:-0.1841
## Median : 0.0095 Median :-0.1380 Median :-0.0010 Median :-0.0616
## Mean : 0.0161 Mean :-0.0593 Mean : 0.0080 Mean :-0.0359
## 3rd Qu.: 0.1234 3rd Qu.: 0.0381 3rd Qu.: 0.1045 3rd Qu.: 0.0722
## Max. : 0.6491 Max. : 0.8648 Max. : 0.4791 Max. : 0.5975
## FBXO31 GSTM3_s RP5.860F19.3_s BBC3_s
## Min. :-0.4215 Min. :-1.845 Min. :-2.2232 Min. :-2.9155
## 1st Qu.:-0.1388 1st Qu.:-0.763 1st Qu.:-0.6210 1st Qu.:-0.6625
## Median :-0.0451 Median :-0.130 Median :-0.0348 Median : 0.0531
## Mean :-0.0253 Mean : 0.000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.0860 3rd Qu.: 0.597 3rd Qu.: 0.4423 3rd Qu.: 0.6732
## Max. : 0.5556 Max. : 2.786 Max. : 2.9235 Max. : 2.1487
## MMP9_s Contig35251_RC_s Contig40831_RC_s ALDH4A1_s
## Min. :-2.1926 Min. :-1.534 Min. :-2.653 Min. :-3.322
## 1st Qu.:-0.5924 1st Qu.:-0.785 1st Qu.:-0.729 1st Qu.:-0.661
## Median :-0.0509 Median :-0.348 Median : 0.120 Median : 0.106
## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.5992 3rd Qu.: 0.680 3rd Qu.: 0.651 3rd Qu.: 0.743
## Max. : 2.6553 Max. : 2.870 Max. : 2.296 Max. : 2.831
## SERF1A_s SCUBE2_s MTDH_s DCK_s
## Min. :-3.214 Min. :-3.1470 Min. :-2.1023 Min. :-2.120
## 1st Qu.:-0.534 1st Qu.:-0.6724 1st Qu.:-0.7373 1st Qu.:-0.748
## Median : 0.070 Median : 0.0104 Median : 0.0118 Median :-0.066
## Mean : 0.000 Mean : 0.0000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.568 3rd Qu.: 0.6356 3rd Qu.: 0.5733 3rd Qu.: 0.584
## Max. : 2.125 Max. : 2.9577 Max. : 2.5964 Max. : 3.320
## FLT1_s PECI.1_s QSCN6L1_s DIAPH3_s
## Min. :-3.057 Min. :-2.456 Min. :-3.015 Min. :-2.5771
## 1st Qu.:-0.636 1st Qu.:-0.651 1st Qu.:-0.513 1st Qu.:-0.5946
## Median : 0.123 Median :-0.041 Median :-0.105 Median : 0.0302
## Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean : 0.0000
## 3rd Qu.: 0.572 3rd Qu.: 0.568 3rd Qu.: 0.575 3rd Qu.: 0.6471
## Max. : 3.227 Max. : 3.356 Max. : 3.897 Max. : 2.1500
## SLC2A3_s GPR180_s RTN4RL1_s Contig32125_RC_s
## Min. :-2.4430 Min. :-2.638 Min. :-2.503 Min. :-3.357
## 1st Qu.:-0.5685 1st Qu.:-0.515 1st Qu.:-0.659 1st Qu.:-0.660
## Median :-0.0693 Median :-0.053 Median : 0.185 Median : 0.013
## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.4415 3rd Qu.: 0.568 3rd Qu.: 0.696 3rd Qu.: 0.544
## Max. : 2.8891 Max. : 2.659 Max. : 1.886 Max. : 3.011
## STK32B_s EXT1_s COL4A2_s PECI_s
## Min. :-2.711 Min. :-2.4843 Min. :-2.6489 Min. :-1.895
## 1st Qu.:-0.627 1st Qu.:-0.6743 1st Qu.:-0.6795 1st Qu.:-0.734
## Median : 0.110 Median :-0.0225 Median : 0.0334 Median :-0.124
## Mean : 0.000 Mean : 0.0000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.531 3rd Qu.: 0.6559 3rd Qu.: 0.6012 3rd Qu.: 0.626
## Max. : 3.082 Max. : 2.4850 Max. : 3.0457 Max. : 3.023
## GNAZ_s AYTL2_s Contig63649_RC_s RAB6B_s
## Min. :-2.167 Min. :-3.980 Min. :-2.573 Min. :-2.710
## 1st Qu.:-0.700 1st Qu.:-0.635 1st Qu.:-0.643 1st Qu.:-0.618
## Median :-0.175 Median :-0.124 Median :-0.112 Median :-0.172
## Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.485 3rd Qu.: 0.539 3rd Qu.: 0.718 3rd Qu.: 0.524
## Max. : 2.782 Max. : 3.323 Max. : 2.384 Max. : 2.513
## AA555029_RC_s GPR126_s ECT2_s NUSAP1_s
## Min. :-2.190 Min. :-2.1825 Min. :-1.917 Min. :-2.4885
## 1st Qu.:-0.743 1st Qu.:-0.7189 1st Qu.:-0.759 1st Qu.:-0.6731
## Median : 0.106 Median : 0.0356 Median :-0.131 Median :-0.0273
## Mean : 0.000 Mean : 0.0000 Mean : 0.000 Mean : 0.0000
## 3rd Qu.: 0.687 3rd Qu.: 0.6862 3rd Qu.: 0.621 3rd Qu.: 0.6540
## Max. : 4.495 Max. : 2.7371 Max. : 3.458 Max. : 2.8981
## GMPS_s UCHL5_s ORC6L_s TSPYL5_s
## Min. :-2.0588 Min. :-2.657 Min. :-2.786 Min. :-2.7252
## 1st Qu.:-0.8669 1st Qu.:-0.654 1st Qu.:-0.607 1st Qu.:-0.6176
## Median : 0.0594 Median :-0.088 Median : 0.102 Median : 0.0318
## Mean : 0.0000 Mean : 0.000 Mean : 0.000 Mean : 0.0000
## 3rd Qu.: 0.8269 3rd Qu.: 0.711 3rd Qu.: 0.754 3rd Qu.: 0.6877
## Max. : 2.3741 Max. : 3.578 Max. : 2.088 Max. : 2.7375
## MELK_s RUNDC1_s DIAPH3.1_s C16orf61_s
## Min. :-3.193 Min. :-2.5022 Min. :-2.6615 Min. :-2.860
## 1st Qu.:-0.604 1st Qu.:-0.7355 1st Qu.:-0.7545 1st Qu.:-0.672
## Median :-0.051 Median :-0.0409 Median :-0.0537 Median :-0.176
## Mean : 0.000 Mean : 0.0000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.533 3rd Qu.: 0.6859 3rd Qu.: 0.6400 3rd Qu.: 0.609
## Max. : 3.744 Max. : 2.8101 Max. : 2.8273 Max. : 3.380
## TGFB3_s FGF18_s CDC42BPA_s DTL_s
## Min. :-2.8862 Min. :-2.939 Min. :-2.4108 Min. :-2.006
## 1st Qu.:-0.6297 1st Qu.:-0.600 1st Qu.:-0.7237 1st Qu.:-0.839
## Median :-0.0212 Median : 0.126 Median :-0.0991 Median : 0.107
## Mean : 0.0000 Mean : 0.000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.5942 3rd Qu.: 0.666 3rd Qu.: 0.6162 3rd Qu.: 0.785
## Max. : 3.0889 Max. : 2.585 Max. : 2.9450 Max. : 2.095
## WISP1_s DIAPH3.2_s OXCT1_s ZNF533_s
## Min. :-2.8269 Min. :-2.7614 Min. :-2.435 Min. :-1.565
## 1st Qu.:-0.6278 1st Qu.:-0.7434 1st Qu.:-0.585 1st Qu.:-0.700
## Median : 0.0679 Median : 0.0598 Median :-0.036 Median :-0.273
## Mean : 0.0000 Mean : 0.0000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.6807 3rd Qu.: 0.6967 3rd Qu.: 0.588 3rd Qu.: 0.337
## Max. : 2.2588 Max. : 2.2564 Max. : 3.472 Max. : 3.203
## RFC4_s KNTC2_s FBXO31_s GSTM3_l
## Min. :-3.620 Min. :-1.978 Min. :-2.362 Min. :0.971
## 1st Qu.:-0.573 1st Qu.:-0.742 1st Qu.:-0.676 1st Qu.:1.049
## Median :-0.057 Median :-0.129 Median :-0.117 Median :1.092
## Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean :1.098
## 3rd Qu.: 0.611 3rd Qu.: 0.541 3rd Qu.: 0.664 3rd Qu.:1.139
## Max. : 2.983 Max. : 3.170 Max. : 3.463 Max. :1.269
## RP5.860F19.3_l BBC3_l MMP9_l Contig35251_RC_l
## Min. :0.946 Min. :0.651 Min. :0.919 Min. :0.734
## 1st Qu.:1.062 1st Qu.:0.981 1st Qu.:1.044 1st Qu.:0.879
## Median :1.101 Median :1.066 Median :1.083 Median :0.955
## Mean :1.102 Mean :1.053 Mean :1.084 Mean :0.999
## 3rd Qu.:1.132 3rd Qu.:1.135 3rd Qu.:1.127 3rd Qu.:1.113
## Max. :1.279 Max. :1.281 Max. :1.258 Max. :1.385
## Contig40831_RC_l ALDH4A1_l SERF1A_l SCUBE2_l
## Min. :0.928 Min. :0.803 Min. :0.893 Min. :0.91
## 1st Qu.:1.056 1st Qu.:1.038 1st Qu.:1.065 1st Qu.:1.05
## Median :1.108 Median :1.097 Median :1.100 Median :1.09
## Mean :1.099 Mean :1.087 Mean :1.095 Mean :1.09
## 3rd Qu.:1.139 3rd Qu.:1.143 3rd Qu.:1.128 3rd Qu.:1.12
## Max. :1.229 Max. :1.282 Max. :1.211 Max. :1.23
## MTDH_l DCK_l FLT1_l PECI.1_l
## Min. :0.843 Min. :0.738 Min. :0.923 Min. :0.942
## 1st Qu.:0.996 1st Qu.:0.905 1st Qu.:1.064 1st Qu.:1.051
## Median :1.070 Median :0.978 Median :1.105 Median :1.085
## Mean :1.065 Mean :0.980 Mean :1.097 Mean :1.086
## 3rd Qu.:1.123 3rd Qu.:1.044 3rd Qu.:1.128 3rd Qu.:1.118
## Max. :1.292 Max. :1.280 Max. :1.255 Max. :1.256
## QSCN6L1_l DIAPH3_l SLC2A3_l GPR180_l
## Min. :0.963 Min. :0.936 Min. :0.966 Min. :0.973
## 1st Qu.:1.083 1st Qu.:1.060 1st Qu.:1.072 1st Qu.:1.071
## Median :1.101 Median :1.097 Median :1.099 Median :1.092
## Mean :1.105 Mean :1.093 Mean :1.101 Mean :1.093
## 3rd Qu.:1.131 3rd Qu.:1.131 3rd Qu.:1.125 3rd Qu.:1.118
## Max. :1.264 Max. :1.210 Max. :1.242 Max. :1.203
## RTN4RL1_l Contig32125_RC_l STK32B_l EXT1_l
## Min. :0.848 Min. :0.903 Min. :0.924 Min. :0.925
## 1st Qu.:1.028 1st Qu.:1.060 1st Qu.:1.050 1st Qu.:1.041
## Median :1.100 Median :1.096 Median :1.091 Median :1.080
## Mean :1.081 Mean :1.094 Mean :1.083 Mean :1.079
## 3rd Qu.:1.142 3rd Qu.:1.123 3rd Qu.:1.113 3rd Qu.:1.119
## Max. :1.232 Max. :1.240 Max. :1.241 Max. :1.216
## COL4A2_l PECI_l GNAZ_l AYTL2_l
## Min. :0.876 Min. :0.939 Min. :0.987 Min. :0.835
## 1st Qu.:1.030 1st Qu.:1.032 1st Qu.:1.066 1st Qu.:1.054
## Median :1.081 Median :1.077 Median :1.093 Median :1.083
## Mean :1.076 Mean :1.084 Mean :1.101 Mean :1.089
## 3rd Qu.:1.119 3rd Qu.:1.130 3rd Qu.:1.126 3rd Qu.:1.120
## Max. :1.270 Max. :1.283 Max. :1.233 Max. :1.262
## Contig63649_RC_l RAB6B_l AA555029_RC_l GPR126_l
## Min. :0.969 Min. :0.888 Min. :0.944 Min. :0.963
## 1st Qu.:1.065 1st Qu.:1.050 1st Qu.:1.044 1st Qu.:1.052
## Median :1.090 Median :1.081 Median :1.098 Median :1.095
## Mean :1.094 Mean :1.091 Mean :1.090 Mean :1.092
## 3rd Qu.:1.128 3rd Qu.:1.128 3rd Qu.:1.134 3rd Qu.:1.131
## Max. :1.200 Max. :1.251 Max. :1.340 Max. :1.235
## ECT2_l NUSAP1_l GMPS_l UCHL5_l ORC6L_l
## Min. :0.913 Min. :0.881 Min. :0.879 Min. :0.933 Min. :0.79
## 1st Qu.:1.018 1st Qu.:1.044 1st Qu.:0.999 1st Qu.:1.054 1st Qu.:1.02
## Median :1.071 Median :1.095 Median :1.083 Median :1.086 Median :1.09
## Mean :1.079 Mean :1.095 Mean :1.074 Mean :1.089 Mean :1.08
## 3rd Qu.:1.131 3rd Qu.:1.147 3rd Qu.:1.148 3rd Qu.:1.129 3rd Qu.:1.15
## Max. :1.329 Max. :1.302 Max. :1.268 Max. :1.270 Max. :1.25
## TSPYL5_l MELK_l RUNDC1_l DIAPH3.1_l
## Min. :0.842 Min. :0.793 Min. :0.756 Min. :0.803
## 1st Qu.:1.037 1st Qu.:1.033 1st Qu.:0.982 1st Qu.:1.009
## Median :1.090 Median :1.078 Median :1.058 Median :1.076
## Mean :1.085 Mean :1.079 Mean :1.057 Mean :1.076
## 3rd Qu.:1.141 3rd Qu.:1.123 3rd Qu.:1.133 3rd Qu.:1.137
## Max. :1.286 Max. :1.340 Max. :1.323 Max. :1.310
## C16orf61_l TGFB3_l FGF18_l CDC42BPA_l DTL_l
## Min. :0.871 Min. :0.95 Min. :0.876 Min. :0.938 Min. :0.551
## 1st Qu.:1.034 1st Qu.:1.07 1st Qu.:1.051 1st Qu.:1.047 1st Qu.:0.854
## Median :1.067 Median :1.10 Median :1.099 Median :1.084 Median :1.046
## Mean :1.077 Mean :1.10 Mean :1.089 Mean :1.088 Mean :1.007
## 3rd Qu.:1.118 3rd Qu.:1.13 3rd Qu.:1.134 3rd Qu.:1.125 3rd Qu.:1.164
## Max. :1.279 Max. :1.24 Max. :1.248 Max. :1.248 Max. :1.359
## WISP1_l DIAPH3.2_l OXCT1_l ZNF533_l RFC4_l
## Min. :0.94 Min. :0.936 Min. :0.945 Min. :0.912 Min. :0.89
## 1st Qu.:1.07 1st Qu.:1.057 1st Qu.:1.068 1st Qu.:1.008 1st Qu.:1.07
## Median :1.11 Median :1.102 Median :1.102 Median :1.052 Median :1.10
## Mean :1.10 Mean :1.097 Mean :1.102 Mean :1.074 Mean :1.10
## 3rd Qu.:1.14 3rd Qu.:1.135 3rd Qu.:1.139 3rd Qu.:1.111 3rd Qu.:1.13
## Max. :1.22 Max. :1.214 Max. :1.294 Max. :1.352 Max. :1.25
## KNTC2_l FBXO31_l c.age
## Min. :0.944 Min. :0.947 Min. :-18.25
## 1st Qu.:1.035 1st Qu.:1.051 1st Qu.: -3.25
## Median :1.078 Median :1.083 Median : 0.75
## Mean :1.084 Mean :1.089 Mean : 0.00
## 3rd Qu.:1.122 3rd Qu.:1.127 3rd Qu.: 4.75
## Max. :1.280 Max. :1.268 Max. : 8.75
dim(viral34_ca)
## [1] 140 158
#Assigning the dependent, factored categorical variable "infection type) to a variable Y
Y<-viral34_c$infection
Y
## [1] bacterial_infection viral_infection bacterial_infection
## [4] viral_infection viral_infection viral_infection
## [7] bacterial_infection bacterial_infection viral_infection
## [10] bacterial_infection viral_infection viral_infection
## [13] viral_infection bacterial_infection bacterial_infection
## [16] bacterial_infection viral_infection bacterial_infection
## [19] bacterial_infection viral_infection bacterial_infection
## [22] viral_infection bacterial_infection viral_infection
## [25] bacterial_infection bacterial_infection bacterial_infection
## [28] bacterial_infection bacterial_infection bacterial_infection
## [31] bacterial_infection bacterial_infection bacterial_infection
## [34] viral_infection viral_infection viral_infection
## [37] viral_infection viral_infection viral_infection
## [40] bacterial_infection viral_infection bacterial_infection
## [43] viral_infection viral_infection viral_infection
## [46] viral_infection viral_infection bacterial_infection
## [49] viral_infection viral_infection bacterial_infection
## [52] viral_infection bacterial_infection bacterial_infection
## [55] viral_infection bacterial_infection bacterial_infection
## [58] bacterial_infection bacterial_infection viral_infection
## [61] viral_infection viral_infection viral_infection
## [64] viral_infection bacterial_infection viral_infection
## [67] viral_infection bacterial_infection bacterial_infection
## [70] viral_infection bacterial_infection viral_infection
## [73] viral_infection viral_infection viral_infection
## [76] bacterial_infection bacterial_infection viral_infection
## [79] viral_infection bacterial_infection bacterial_infection
## [82] bacterial_infection viral_infection viral_infection
## [85] bacterial_infection viral_infection viral_infection
## [88] viral_infection viral_infection bacterial_infection
## [91] bacterial_infection viral_infection bacterial_infection
## [94] bacterial_infection viral_infection viral_infection
## [97] viral_infection bacterial_infection bacterial_infection
## [100] viral_infection bacterial_infection bacterial_infection
## [103] viral_infection bacterial_infection viral_infection
## [106] viral_infection viral_infection bacterial_infection
## [109] bacterial_infection viral_infection bacterial_infection
## [112] viral_infection bacterial_infection viral_infection
## [115] viral_infection bacterial_infection viral_infection
## [118] bacterial_infection bacterial_infection viral_infection
## [121] viral_infection viral_infection bacterial_infection
## [124] viral_infection bacterial_infection bacterial_infection
## [127] bacterial_infection bacterial_infection viral_infection
## [130] bacterial_infection bacterial_infection viral_infection
## [133] viral_infection bacterial_infection viral_infection
## [136] bacterial_infection bacterial_infection bacterial_infection
## [139] bacterial_infection viral_infection
## Levels: bacterial_infection viral_infection
#Index Directory of Co-Variate Columns in dataframe viral34_ca
# gender=4
# age=6
# ancestry=7,
# First scaled 10 genes=58-67
# c.age=148
#Obtaining first the FULL logistic model with “infection” as dependent variable and variables (including UNCORRECTED age) as covariates:
model1<-glm(Y~., data=viral34_c[,c(4,6,7,58:67)],family="binomial")
model1
##
## Call: glm(formula = Y ~ ., family = "binomial", data = viral34_c[,
## c(4, 6, 7, 58:67)])
##
## Coefficients:
## (Intercept) gendermale age ancestryB
## 0.776969 -0.706281 -0.000755 -0.661874
## ancestryC GSTM3_s RP5.860F19.3_s BBC3_s
## -1.374640 -0.180613 0.111857 -0.207958
## MMP9_s Contig35251_RC_s Contig40831_RC_s ALDH4A1_s
## 1.424142 0.229409 0.142125 1.601389
## SERF1A_s SCUBE2_s MTDH_s
## 0.116305 -0.281441 -0.009742
##
## Degrees of Freedom: 139 Total (i.e. Null); 125 Residual
## Null Deviance: 194
## Residual Deviance: 140 AIC: 170
summary(model1)
##
## Call:
## glm(formula = Y ~ ., family = "binomial", data = viral34_c[,
## c(4, 6, 7, 58:67)])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.776969 1.782134 0.44 0.663
## gendermale -0.706281 0.455474 -1.55 0.121
## age -0.000755 0.040859 -0.02 0.985
## ancestryB -0.661874 0.570617 -1.16 0.246
## ancestryC -1.374640 0.723780 -1.90 0.058 .
## GSTM3_s -0.180613 0.290497 -0.62 0.534
## RP5.860F19.3_s 0.111857 0.296742 0.38 0.706
## BBC3_s -0.207958 0.226570 -0.92 0.359
## MMP9_s 1.424142 0.332779 4.28 1.9e-05 ***
## Contig35251_RC_s 0.229409 0.243580 0.94 0.346
## Contig40831_RC_s 0.142125 0.243031 0.58 0.559
## ALDH4A1_s 1.601389 0.385158 4.16 3.2e-05 ***
## SERF1A_s 0.116305 0.265279 0.44 0.661
## SCUBE2_s -0.281441 0.251780 -1.12 0.264
## MTDH_s -0.009742 0.264942 -0.04 0.971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.05 on 139 degrees of freedom
## Residual deviance: 140.11 on 125 degrees of freedom
## AIC: 170.1
##
## Number of Fisher Scoring iterations: 5
# Null deviance: 194.05 on 139 degrees of freedom
# Residual deviance: 140.11 on 125 degrees of freedom
# AIC: 170.11
#Obtaining first the FULL logistic model with “infection” as dependent variable and variables (including CORRECTED age) as covariates:
model1a<-glm(Y~., data=viral34_ca[,c(4,148,7,58:67)],family="binomial")
model1a
##
## Call: glm(formula = Y ~ ., family = "binomial", data = viral34_ca[,
## c(4, 148, 7, 58:67)])
##
## Coefficients:
## (Intercept) gendermale FGF18_l ancestryB
## -6.1156 -0.8018 6.3427 -0.6907
## ancestryC GSTM3_s RP5.860F19.3_s BBC3_s
## -1.3728 -0.2650 0.0970 -0.1399
## MMP9_s Contig35251_RC_s Contig40831_RC_s ALDH4A1_s
## 1.4432 0.2498 0.1848 1.4022
## SERF1A_s SCUBE2_s MTDH_s
## 0.1195 -0.3329 -0.0127
##
## Degrees of Freedom: 139 Total (i.e. Null); 125 Residual
## Null Deviance: 194
## Residual Deviance: 138 AIC: 168
summary(model1a)
##
## Call:
## glm(formula = Y ~ ., family = "binomial", data = viral34_ca[,
## c(4, 148, 7, 58:67)])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.1156 5.3325 -1.15 0.25144
## gendermale -0.8018 0.4616 -1.74 0.08238 .
## FGF18_l 6.3427 4.9228 1.29 0.19759
## ancestryB -0.6907 0.5761 -1.20 0.23057
## ancestryC -1.3728 0.7322 -1.87 0.06081 .
## GSTM3_s -0.2650 0.2997 -0.88 0.37668
## RP5.860F19.3_s 0.0970 0.3036 0.32 0.74938
## BBC3_s -0.1399 0.2343 -0.60 0.55035
## MMP9_s 1.4432 0.3360 4.29 1.7e-05 ***
## Contig35251_RC_s 0.2498 0.2414 1.03 0.30075
## Contig40831_RC_s 0.1848 0.2436 0.76 0.44810
## ALDH4A1_s 1.4022 0.4116 3.41 0.00066 ***
## SERF1A_s 0.1195 0.2560 0.47 0.64068
## SCUBE2_s -0.3329 0.2549 -1.31 0.19147
## MTDH_s -0.0127 0.2652 -0.05 0.96190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.05 on 139 degrees of freedom
## Residual deviance: 138.41 on 125 degrees of freedom
## AIC: 168.4
##
## Number of Fisher Scoring iterations: 5
# Null deviance: 194.05 on 139 degrees of freedom
# Residual deviance: 138.41 on 125 degrees of freedom
# AIC: 168.41
#Comparing these two FULL models, it is evident that the deviance and AIC was lower after "correcting" for age. Therefore, age correction will
#used for subsequent variable selection:
#When the number of variables is large, stepwise selection of variables is used to remove those variables that are not related to the outcome:
#Given a multivariate FULL, age-adjusted logistic model, Forward selection, Backward elimination, and Stepwise regression variable selection
#will be used to remove variables that are unimportant and unassociated with the infection-type response variable:
#Comparing the fit of different models will require adjusting for the number k of covariates in the model because the largest or
#more complex model always provides the smallest R2. Other adjusted measures include:
#AIC (Akaike information criterion): Deviance+2(k+1)
#BIC (Bayesian information criterion): Deviance+ ln(n)*(k+1)
#The model with the smallest AIC or BIC will be chosen as best.model for Problem#14 Validation
#Variable Selection:
#Performing forward selection:
forwardmodel1<-step(model1,direction="forward")
## Start: AIC=170.1
## Y ~ gender + age + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s +
## MMP9_s + Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s +
## SERF1A_s + SCUBE2_s + MTDH_s
#Results show an AIC=170.11 after forward selection:
# Start: AIC=170.11
#
# Y ~ gender + age + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s +
# MMP9_s + Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s +
# SERF1A_s + SCUBE2_s + MTDH_s
summary(forwardmodel1) # best forward model
##
## Call:
## glm(formula = Y ~ gender + age + ancestry + GSTM3_s + RP5.860F19.3_s +
## BBC3_s + MMP9_s + Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s +
## SERF1A_s + SCUBE2_s + MTDH_s, family = "binomial", data = viral34_c[,
## c(4, 6, 7, 58:67)])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.776969 1.782134 0.44 0.663
## gendermale -0.706281 0.455474 -1.55 0.121
## age -0.000755 0.040859 -0.02 0.985
## ancestryB -0.661874 0.570617 -1.16 0.246
## ancestryC -1.374640 0.723780 -1.90 0.058 .
## GSTM3_s -0.180613 0.290497 -0.62 0.534
## RP5.860F19.3_s 0.111857 0.296742 0.38 0.706
## BBC3_s -0.207958 0.226570 -0.92 0.359
## MMP9_s 1.424142 0.332779 4.28 1.9e-05 ***
## Contig35251_RC_s 0.229409 0.243580 0.94 0.346
## Contig40831_RC_s 0.142125 0.243031 0.58 0.559
## ALDH4A1_s 1.601389 0.385158 4.16 3.2e-05 ***
## SERF1A_s 0.116305 0.265279 0.44 0.661
## SCUBE2_s -0.281441 0.251780 -1.12 0.264
## MTDH_s -0.009742 0.264942 -0.04 0.971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.05 on 139 degrees of freedom
## Residual deviance: 140.11 on 125 degrees of freedom
## AIC: 170.1
##
## Number of Fisher Scoring iterations: 5
#Performing backward selection:
backwardmodel1<-step(model1,direction="backward")
## Start: AIC=170.1
## Y ~ gender + age + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s +
## MMP9_s + Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s +
## SERF1A_s + SCUBE2_s + MTDH_s
##
## Df Deviance AIC
## - age 1 140 168
## - MTDH_s 1 140 168
## - RP5.860F19.3_s 1 140 168
## - SERF1A_s 1 140 168
## - Contig40831_RC_s 1 140 168
## - GSTM3_s 1 140 168
## - BBC3_s 1 141 169
## - Contig35251_RC_s 1 141 169
## - SCUBE2_s 1 141 169
## <none> 140 170
## - gender 1 143 171
## - ancestry 2 145 171
## - ALDH4A1_s 1 163 191
## - MMP9_s 1 166 194
##
## Step: AIC=168.1
## Y ~ gender + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s + MMP9_s +
## Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s + SERF1A_s +
## SCUBE2_s + MTDH_s
##
## Df Deviance AIC
## - MTDH_s 1 140 166
## - RP5.860F19.3_s 1 140 166
## - SERF1A_s 1 140 166
## - Contig40831_RC_s 1 140 166
## - GSTM3_s 1 140 166
## - BBC3_s 1 141 167
## - Contig35251_RC_s 1 141 167
## - SCUBE2_s 1 141 167
## <none> 140 168
## - gender 1 143 169
## - ancestry 2 145 169
## - ALDH4A1_s 1 163 189
## - MMP9_s 1 166 192
##
## Step: AIC=166.1
## Y ~ gender + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s + MMP9_s +
## Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s + SERF1A_s +
## SCUBE2_s
##
## Df Deviance AIC
## - RP5.860F19.3_s 1 140 164
## - SERF1A_s 1 140 164
## - Contig40831_RC_s 1 140 164
## - GSTM3_s 1 140 164
## - BBC3_s 1 141 165
## - Contig35251_RC_s 1 141 165
## - SCUBE2_s 1 141 165
## <none> 140 166
## - gender 1 143 167
## - ancestry 2 145 167
## - ALDH4A1_s 1 165 189
## - MMP9_s 1 168 192
##
## Step: AIC=164.3
## Y ~ gender + ancestry + GSTM3_s + BBC3_s + MMP9_s + Contig35251_RC_s +
## Contig40831_RC_s + ALDH4A1_s + SERF1A_s + SCUBE2_s
##
## Df Deviance AIC
## - SERF1A_s 1 140 162
## - GSTM3_s 1 141 163
## - Contig40831_RC_s 1 141 163
## - BBC3_s 1 141 163
## - Contig35251_RC_s 1 141 163
## - SCUBE2_s 1 142 164
## <none> 140 164
## - gender 1 143 165
## - ancestry 2 145 165
## - ALDH4A1_s 1 168 190
## - MMP9_s 1 168 190
##
## Step: AIC=162.5
## Y ~ gender + ancestry + GSTM3_s + BBC3_s + MMP9_s + Contig35251_RC_s +
## Contig40831_RC_s + ALDH4A1_s + SCUBE2_s
##
## Df Deviance AIC
## - GSTM3_s 1 141 161
## - Contig40831_RC_s 1 141 161
## - BBC3_s 1 142 162
## - Contig35251_RC_s 1 142 162
## - SCUBE2_s 1 142 162
## <none> 140 162
## - gender 1 143 163
## - ancestry 2 145 163
## - ALDH4A1_s 1 168 188
## - MMP9_s 1 169 189
##
## Step: AIC=160.8
## Y ~ gender + ancestry + BBC3_s + MMP9_s + Contig35251_RC_s +
## Contig40831_RC_s + ALDH4A1_s + SCUBE2_s
##
## Df Deviance AIC
## - Contig40831_RC_s 1 141 159
## - BBC3_s 1 142 160
## - SCUBE2_s 1 142 160
## - Contig35251_RC_s 1 142 160
## <none> 141 161
## - gender 1 143 161
## - ancestry 2 146 162
## - MMP9_s 1 170 188
## - ALDH4A1_s 1 172 190
##
## Step: AIC=159.2
## Y ~ gender + ancestry + BBC3_s + MMP9_s + Contig35251_RC_s +
## ALDH4A1_s + SCUBE2_s
##
## Df Deviance AIC
## - BBC3_s 1 142 158
## - SCUBE2_s 1 142 158
## - Contig35251_RC_s 1 143 159
## <none> 141 159
## - gender 1 144 160
## - ancestry 2 146 160
## - ALDH4A1_s 1 173 189
## - MMP9_s 1 174 190
##
## Step: AIC=158.2
## Y ~ gender + ancestry + MMP9_s + Contig35251_RC_s + ALDH4A1_s +
## SCUBE2_s
##
## Df Deviance AIC
## - SCUBE2_s 1 144 158
## - Contig35251_RC_s 1 144 158
## <none> 142 158
## - gender 1 144 158
## - ancestry 2 147 159
## - ALDH4A1_s 1 173 187
## - MMP9_s 1 174 188
##
## Step: AIC=157.5
## Y ~ gender + ancestry + MMP9_s + Contig35251_RC_s + ALDH4A1_s
##
## Df Deviance AIC
## - gender 1 145 157
## - Contig35251_RC_s 1 146 158
## <none> 144 158
## - ancestry 2 148 158
## - ALDH4A1_s 1 173 185
## - MMP9_s 1 176 188
##
## Step: AIC=157.3
## Y ~ ancestry + MMP9_s + Contig35251_RC_s + ALDH4A1_s
##
## Df Deviance AIC
## - Contig35251_RC_s 1 147 157
## <none> 145 157
## - ancestry 2 150 158
## - ALDH4A1_s 1 175 185
## - MMP9_s 1 176 186
##
## Step: AIC=156.8
## Y ~ ancestry + MMP9_s + ALDH4A1_s
##
## Df Deviance AIC
## <none> 147 157
## - ancestry 2 152 158
## - ALDH4A1_s 1 175 183
## - MMP9_s 1 181 189
#Results show reduced AIC=156.84 and reduction of covariates to only ancestry, scaled Gene expression MMP9_s, and scaled gene expression
#ALDH4A1_s (Alcohol Dehydrogenase) after forward selection:
# Step: AIC=156.84
# Y ~ ancestry + MMP9_s + ALDH4A1_s
#
# Df Deviance AIC
# <none> 146.84 156.84
# - ancestry 2 151.94 157.94
# - ALDH4A1_s 1 175.43 183.43
# - MMP9_s 1 180.59 188.59
summary(backwardmodel1)
##
## Call:
## glm(formula = Y ~ ancestry + MMP9_s + ALDH4A1_s, family = "binomial",
## data = viral34_c[, c(4, 6, 7, 58:67)])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.351 0.243 1.45 0.148
## ancestryB -0.776 0.538 -1.44 0.149
## ancestryC -1.308 0.694 -1.89 0.059 .
## MMP9_s 1.375 0.288 4.78 1.8e-06 ***
## ALDH4A1_s 1.267 0.282 4.49 7.0e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.05 on 139 degrees of freedom
## Residual deviance: 146.84 on 135 degrees of freedom
## AIC: 156.8
##
## Number of Fisher Scoring iterations: 5
#Performing both selection:
#backward-forward (both) selection
bothmodel1<-step(model1,direction="both")
## Start: AIC=170.1
## Y ~ gender + age + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s +
## MMP9_s + Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s +
## SERF1A_s + SCUBE2_s + MTDH_s
##
## Df Deviance AIC
## - age 1 140 168
## - MTDH_s 1 140 168
## - RP5.860F19.3_s 1 140 168
## - SERF1A_s 1 140 168
## - Contig40831_RC_s 1 140 168
## - GSTM3_s 1 140 168
## - BBC3_s 1 141 169
## - Contig35251_RC_s 1 141 169
## - SCUBE2_s 1 141 169
## <none> 140 170
## - gender 1 143 171
## - ancestry 2 145 171
## - ALDH4A1_s 1 163 191
## - MMP9_s 1 166 194
##
## Step: AIC=168.1
## Y ~ gender + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s + MMP9_s +
## Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s + SERF1A_s +
## SCUBE2_s + MTDH_s
##
## Df Deviance AIC
## - MTDH_s 1 140 166
## - RP5.860F19.3_s 1 140 166
## - SERF1A_s 1 140 166
## - Contig40831_RC_s 1 140 166
## - GSTM3_s 1 140 166
## - BBC3_s 1 141 167
## - Contig35251_RC_s 1 141 167
## - SCUBE2_s 1 141 167
## <none> 140 168
## - gender 1 143 169
## - ancestry 2 145 169
## + age 1 140 170
## - ALDH4A1_s 1 163 189
## - MMP9_s 1 166 192
##
## Step: AIC=166.1
## Y ~ gender + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s + MMP9_s +
## Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s + SERF1A_s +
## SCUBE2_s
##
## Df Deviance AIC
## - RP5.860F19.3_s 1 140 164
## - SERF1A_s 1 140 164
## - Contig40831_RC_s 1 140 164
## - GSTM3_s 1 140 164
## - BBC3_s 1 141 165
## - Contig35251_RC_s 1 141 165
## - SCUBE2_s 1 141 165
## <none> 140 166
## - gender 1 143 167
## - ancestry 2 145 167
## + MTDH_s 1 140 168
## + age 1 140 168
## - ALDH4A1_s 1 165 189
## - MMP9_s 1 168 192
##
## Step: AIC=164.3
## Y ~ gender + ancestry + GSTM3_s + BBC3_s + MMP9_s + Contig35251_RC_s +
## Contig40831_RC_s + ALDH4A1_s + SERF1A_s + SCUBE2_s
##
## Df Deviance AIC
## - SERF1A_s 1 140 162
## - GSTM3_s 1 141 163
## - Contig40831_RC_s 1 141 163
## - BBC3_s 1 141 163
## - Contig35251_RC_s 1 141 163
## - SCUBE2_s 1 142 164
## <none> 140 164
## - gender 1 143 165
## - ancestry 2 145 165
## + RP5.860F19.3_s 1 140 166
## + MTDH_s 1 140 166
## + age 1 140 166
## - ALDH4A1_s 1 168 190
## - MMP9_s 1 168 190
##
## Step: AIC=162.5
## Y ~ gender + ancestry + GSTM3_s + BBC3_s + MMP9_s + Contig35251_RC_s +
## Contig40831_RC_s + ALDH4A1_s + SCUBE2_s
##
## Df Deviance AIC
## - GSTM3_s 1 141 161
## - Contig40831_RC_s 1 141 161
## - BBC3_s 1 142 162
## - Contig35251_RC_s 1 142 162
## - SCUBE2_s 1 142 162
## <none> 140 162
## - gender 1 143 163
## - ancestry 2 145 163
## + SERF1A_s 1 140 164
## + RP5.860F19.3_s 1 140 164
## + age 1 140 164
## + MTDH_s 1 140 164
## - ALDH4A1_s 1 168 188
## - MMP9_s 1 169 189
##
## Step: AIC=160.8
## Y ~ gender + ancestry + BBC3_s + MMP9_s + Contig35251_RC_s +
## Contig40831_RC_s + ALDH4A1_s + SCUBE2_s
##
## Df Deviance AIC
## - Contig40831_RC_s 1 141 159
## - BBC3_s 1 142 160
## - SCUBE2_s 1 142 160
## - Contig35251_RC_s 1 142 160
## <none> 141 161
## - gender 1 143 161
## - ancestry 2 146 162
## + GSTM3_s 1 140 162
## + SERF1A_s 1 141 163
## + RP5.860F19.3_s 1 141 163
## + age 1 141 163
## + MTDH_s 1 141 163
## - MMP9_s 1 170 188
## - ALDH4A1_s 1 172 190
##
## Step: AIC=159.2
## Y ~ gender + ancestry + BBC3_s + MMP9_s + Contig35251_RC_s +
## ALDH4A1_s + SCUBE2_s
##
## Df Deviance AIC
## - BBC3_s 1 142 158
## - SCUBE2_s 1 142 158
## - Contig35251_RC_s 1 143 159
## <none> 141 159
## - gender 1 144 160
## - ancestry 2 146 160
## + Contig40831_RC_s 1 141 161
## + GSTM3_s 1 141 161
## + SERF1A_s 1 141 161
## + age 1 141 161
## + RP5.860F19.3_s 1 141 161
## + MTDH_s 1 141 161
## - ALDH4A1_s 1 173 189
## - MMP9_s 1 174 190
##
## Step: AIC=158.2
## Y ~ gender + ancestry + MMP9_s + Contig35251_RC_s + ALDH4A1_s +
## SCUBE2_s
##
## Df Deviance AIC
## - SCUBE2_s 1 144 158
## - Contig35251_RC_s 1 144 158
## <none> 142 158
## - gender 1 144 158
## - ancestry 2 147 159
## + BBC3_s 1 141 159
## + Contig40831_RC_s 1 142 160
## + GSTM3_s 1 142 160
## + SERF1A_s 1 142 160
## + RP5.860F19.3_s 1 142 160
## + age 1 142 160
## + MTDH_s 1 142 160
## - ALDH4A1_s 1 173 187
## - MMP9_s 1 174 188
##
## Step: AIC=157.5
## Y ~ gender + ancestry + MMP9_s + Contig35251_RC_s + ALDH4A1_s
##
## Df Deviance AIC
## - gender 1 145 157
## - Contig35251_RC_s 1 146 158
## <none> 144 158
## + SCUBE2_s 1 142 158
## - ancestry 2 148 158
## + BBC3_s 1 142 158
## + Contig40831_RC_s 1 143 159
## + SERF1A_s 1 143 159
## + MTDH_s 1 143 159
## + GSTM3_s 1 143 159
## + RP5.860F19.3_s 1 144 160
## + age 1 144 160
## - ALDH4A1_s 1 173 185
## - MMP9_s 1 176 188
##
## Step: AIC=157.3
## Y ~ ancestry + MMP9_s + Contig35251_RC_s + ALDH4A1_s
##
## Df Deviance AIC
## - Contig35251_RC_s 1 147 157
## <none> 145 157
## + gender 1 144 158
## - ancestry 2 150 158
## + BBC3_s 1 144 158
## + SCUBE2_s 1 144 158
## + Contig40831_RC_s 1 145 159
## + SERF1A_s 1 145 159
## + age 1 145 159
## + MTDH_s 1 145 159
## + GSTM3_s 1 145 159
## + RP5.860F19.3_s 1 145 159
## - ALDH4A1_s 1 175 185
## - MMP9_s 1 176 186
##
## Step: AIC=156.8
## Y ~ ancestry + MMP9_s + ALDH4A1_s
##
## Df Deviance AIC
## <none> 147 157
## + Contig35251_RC_s 1 145 157
## + gender 1 146 158
## + BBC3_s 1 146 158
## + SCUBE2_s 1 146 158
## - ancestry 2 152 158
## + Contig40831_RC_s 1 146 158
## + SERF1A_s 1 146 158
## + MTDH_s 1 147 159
## + GSTM3_s 1 147 159
## + age 1 147 159
## + RP5.860F19.3_s 1 147 159
## - ALDH4A1_s 1 175 183
## - MMP9_s 1 181 189
#Results show again reduced AIC=156.84
# Step: AIC=156.84
# Y ~ ancestry + MMP9_s + ALDH4A1_s
#
# Df Deviance AIC
# <none> 146.84 156.84
# + Contig35251_RC_s 1 145.32 157.32
# + gender 1 145.51 157.51
# + BBC3_s 1 145.62 157.62
# + SCUBE2_s 1 145.68 157.68
# - ancestry 2 151.94 157.94
# + Contig40831_RC_s 1 146.03 158.03
# + SERF1A_s 1 146.45 158.45
# + MTDH_s 1 146.70 158.70
# + GSTM3_s 1 146.72 158.72
# + age 1 146.79 158.79
# + RP5.860F19.3_s 1 146.81 158.81
# - ALDH4A1_s 1 175.43 183.43
# - MMP9_s 1 180.59 188.59
#Also, summary: # best backward-forward model
summary(bothmodel1)
##
## Call:
## glm(formula = Y ~ ancestry + MMP9_s + ALDH4A1_s, family = "binomial",
## data = viral34_c[, c(4, 6, 7, 58:67)])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.351 0.243 1.45 0.148
## ancestryB -0.776 0.538 -1.44 0.149
## ancestryC -1.308 0.694 -1.89 0.059 .
## MMP9_s 1.375 0.288 4.78 1.8e-06 ***
## ALDH4A1_s 1.267 0.282 4.49 7.0e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.05 on 139 degrees of freedom
## Residual deviance: 146.84 on 135 degrees of freedom
## AIC: 156.8
##
## Number of Fisher Scoring iterations: 5
#The summary of the best.model provides both individual t-tests for the coefficients of the model and the F global test of relationship.
#The summary suggest high F-values and lowered p-values to render the reduced # variables more significant influences on response variable type of infection.
#Results: Neither ancestry are significant (MMP9_s and ALDH4A1_s scaled gene expression levels are significant as their
#individualized t-test-derived p-values<0.05. The global F test is strongly significant.
#INTERSTINGLY, MMP9_s and ALDH4A1_s were among the 5 genes whose non-scaled expression levels
#are significantly different among infection type:
#As specified by problem statement, the selected "bothmodel1" model is designated as “best.model”.
best.model<-bothmodel1
#Checking if assignment succeeded:
summary(best.model)
##
## Call:
## glm(formula = Y ~ ancestry + MMP9_s + ALDH4A1_s, family = "binomial",
## data = viral34_c[, c(4, 6, 7, 58:67)])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.351 0.243 1.45 0.148
## ancestryB -0.776 0.538 -1.44 0.149
## ancestryC -1.308 0.694 -1.89 0.059 .
## MMP9_s 1.375 0.288 4.78 1.8e-06 ***
## ALDH4A1_s 1.267 0.282 4.49 7.0e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.05 on 139 degrees of freedom
## Residual deviance: 146.84 on 135 degrees of freedom
## AIC: 156.8
##
## Number of Fisher Scoring iterations: 5
##Prediction of Y (infection type) for new values of X using best.model
#Specifying the values of the predictor in a dataframe and using function predict():
xnew<-data.frame(viral34_c$ALDH4A1_s==c(3000),viral34_c$MMP9_s==c(4000), viral34_c$ancestry=="B")
#predict(best.model,xnew)
#Interpret the best.model: Beyond having reduced # variables and AIC compared to initial full models,
#I analyzed the best.model and then compare the initial full model as follows:
#The confidence interval for the coefficients of the logistic regression are obtained for best.model with confint():
confint(best.model) # 95% CI for the coefficients
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -0.1190 0.838301
## ancestryB -1.8670 0.260295
## ancestryC -2.7535 0.005862
## MMP9_s 0.8520 1.986676
## ALDH4A1_s 0.7528 1.863358
#Results:
# 2.5 % 97.5 %
# (Intercept) -0.1190425 0.838300536
# ancestryB -1.8670032 0.260294895
# ancestryC -2.7535011 0.005861916
# MMP9_s 0.8520343 1.986676277
# ALDH4A1_s 0.7528095 1.863357741
#For initial full model1:
confint(model1)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -2.72040 4.333878
## gendermale -1.62281 0.173956
## age -0.08187 0.079777
## ancestryB -1.82537 0.433162
## ancestryC -2.88519 -0.006378
## GSTM3_s -0.76966 0.382296
## RP5.860F19.3_s -0.49357 0.684602
## BBC3_s -0.66580 0.230649
## MMP9_s 0.81907 2.134211
## Contig35251_RC_s -0.24862 0.714392
## Contig40831_RC_s -0.33438 0.627067
## ALDH4A1_s 0.89709 2.417264
## SERF1A_s -0.41215 0.638408
## SCUBE2_s -0.79154 0.205422
## MTDH_s -0.52568 0.520991
#Results:
#The confidence intervals are as follows:
# 2.5 % 97.5 %
# (Intercept) -16.8783763 4.176943697
# gendermale -1.7366550 0.084578365
# FGF18_l -3.1467453 16.292373530
# ancestryB -1.8674360 0.413323412
# ancestryC -2.9066239 0.009822317
# GSTM3_s -0.8763169 0.312640234
# RP5.860F19.3_s -0.5235649 0.682053857
# BBC3_s -0.6109021 0.315461511
# MMP9_s 0.8316892 2.158612070
# Contig35251_RC_s -0.2238803 0.730512696
# Contig40831_RC_s -0.2913917 0.672645907
# ALDH4A1_s 0.6376632 2.262095823
# SERF1A_s -0.3904859 0.623907157
# SCUBE2_s -0.8499912 0.159190457
# MTDH_s -0.5289403 0.518853142
#The odds-ratios for initial full model1 are obtained by exponentiating the output of the logistic regression as follows:
exp(coef(model1)) # exponentiated coefficients
## (Intercept) gendermale age ancestryB
## 2.1749 0.4935 0.9992 0.5159
## ancestryC GSTM3_s RP5.860F19.3_s BBC3_s
## 0.2529 0.8348 1.1184 0.8122
## MMP9_s Contig35251_RC_s Contig40831_RC_s ALDH4A1_s
## 4.1543 1.2579 1.1527 4.9599
## SERF1A_s SCUBE2_s MTDH_s
## 1.1233 0.7547 0.9903
#Results:
# (Intercept) gendermale age ancestryB ancestryC GSTM3_s RP5.860F19.3_s BBC3_s MMP9_s
# 2.1748696 0.4934761 0.9992451 0.5158839 0.2529307 0.8347582 1.1183530 0.8122408 4.1542930
# Contig35251_RC_s Contig40831_RC_s ALDH4A1_s SERF1A_s SCUBE2_s MTDH_s
# 1.2578561 1.1527205 4.9599179 1.1233387 0.7546956 0.9903049
#The odds-ratios for new best.model are obtained by exponentiating the output of the logistic regression as follows:
exp(coef(best.model)) # exponentiated coefficients
## (Intercept) ancestryB ancestryC MMP9_s ALDH4A1_s
## 1.4207 0.4604 0.2703 3.9539 3.5486
#Results:
# (Intercept) ancestryB ancestryC MMP9_s ALDH4A1_s
# 1.4206505 0.4603976 0.2702748 3.9539104 3.5486461
#The confidence intervals of the odds-ratios for initial model1 are obtained as follows:
exp(confint(model1)) # 95% CI for exponentiated coefficients
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.06585 76.2394
## gendermale 0.19734 1.1900
## age 0.92140 1.0830
## ancestryB 0.16116 1.5421
## ancestryC 0.05584 0.9936
## GSTM3_s 0.46317 1.4656
## RP5.860F19.3_s 0.61044 1.9830
## BBC3_s 0.51386 1.2594
## MMP9_s 2.26839 8.4504
## Contig35251_RC_s 0.77988 2.0429
## Contig40831_RC_s 0.71578 1.8721
## ALDH4A1_s 2.45245 11.2151
## SERF1A_s 0.66223 1.8935
## SCUBE2_s 0.45315 1.2280
## MTDH_s 0.59115 1.6837
#Results:
# 2.5 % 97.5 %
# (Intercept) 0.06584851 76.239374
# gendermale 0.19734262 1.190003
# age 0.92139629 1.083045
# ancestryB 0.16115836 1.542126
# ancestryC 0.05584396 0.993642
# GSTM3_s 0.46316860 1.465646
# RP5.860F19.3_s 0.61044232 1.982982
# BBC3_s 0.51386096 1.259417
# MMP9_s 2.26839190 8.450376
# Contig35251_RC_s 0.77987874 2.042943
# Contig40831_RC_s 0.71578394 1.872111
# ALDH4A1_s 2.45244687 11.215133
# SERF1A_s 0.66222621 1.893463
# SCUBE2_s 0.45314697 1.228043
# MTDH_s 0.59115500 1.683696
# >
#The confidence intervals of the odds-ratios for new best.model are obtained as follows:
exp(confint(best.model)) # 95% CI for exponentiated coefficients
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.8878 2.312
## ancestryB 0.1546 1.297
## ancestryC 0.0637 1.006
## MMP9_s 2.3444 7.291
## ALDH4A1_s 2.1230 6.445
#Results:
# 2.5 % 97.5 %
# (Intercept) 0.88777010 2.312434
# ancestryB 0.15458623 1.297313
# ancestryC 0.06370444 1.005879
# MMP9_s 2.34441120 7.291259
# ALDH4A1_s 2.12295614 6.445342
#The goodness-of-fit of the intial model1 is preliminarily checked via its deviance:
deviance(model1)
## [1] 140.1
#Results:
#[1] 140.1082
#The goodness-of-fit of the new best.model is preliminarily checked via its deviance:
deviance(best.model)
## [1] 146.8
#Results:
#[1]146.8426
AIC(best.model)
## [1] 156.8
#Result:[1] 156.8426
BIC(best.model)
## [1] 171.6
#Result:[1] 171.5508
#The best.model "Deviance" was higher. However, the 2 models cannot be compared by this criteria as they possess different number of variables. Therefore,
#they will be instead compared definitively via ANOVA as follows:
anova(model1,best.model)
## Analysis of Deviance Table
##
## Model 1: Y ~ gender + age + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s +
## MMP9_s + Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s +
## SERF1A_s + SCUBE2_s + MTDH_s
## Model 2: Y ~ ancestry + MMP9_s + ALDH4A1_s
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 125 140
## 2 135 147 -10 -6.73 0.75
#Results:
# Analysis of Deviance Table
#
# Model 1: Y ~ gender + age + ancestry + GSTM3_s + RP5.860F19.3_s + BBC3_s +
# MMP9_s + Contig35251_RC_s + Contig40831_RC_s + ALDH4A1_s +
# SERF1A_s + SCUBE2_s + MTDH_s
# Model 2: Y ~ ancestry + MMP9_s + ALDH4A1_s
# Resid. Df Resid. Dev Df Deviance
# 1 125 140.11
# 2 135 146.84 -10 -6.7344
#Two covariates interact when the effect of the first covariate on the dependent variable depends on the value of the second covariate.
#To determine INTERACTION and if the two CONTINUOUS covariates MMP9_s and ALDH4A1_s interacted and if best.model needs to be adjusted to account for interaction:
plot(viral34_c$ALDH4A1_s , viral34_c$MMP9_s, col=viral34_c$infection)
#abline(lm(viral34_c$MMP9_s[viral34_c$infection==0]~viral34_c$ALDH4A1_s[infection==0]), col=1)
#abline(lm(viral34_c$MMP9_s[viral34_c$infection==1]~viral34_c$ALDH4A1_s[infection==1]), col=2)
#Based on plot, the points uniformly distributed and there appears to be interaction tht needs to be accounted for.
#I performed linear correlation to get least sum of squares, residuals R2 and determine any linear correlation:
#Linear correlation between non-normally distributed gene expression values of
#For non-normally distributed 2 genes:
cor(viral34_c$MMP9_s, viral34_c$ALDH4A1_s, method="spearman")
## [1] -0.3097
#Results: [1] -0.3096543
#A new bestest.model was proposed to be included the interaction between the two scaled gene expression levels of MMP9_s and ALDH1_s:
bestest.model<-glm(Y~., data=viral34_c[,c(7,11,14,(11*14))],family="binomial")
summary(bestest.model)
##
## Call:
## glm(formula = Y ~ ., family = "binomial", data = viral34_c[,
## c(7, 11, 14, (11 * 14))])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.830 2.607 2.24 0.025 *
## ancestryB -0.754 0.543 -1.39 0.165
## ancestryC -1.439 0.710 -2.03 0.043 *
## MMP9 6.941 1.438 4.83 1.4e-06 ***
## ALDH4A1 6.634 1.453 4.56 5.0e-06 ***
## ZNF533_l -4.658 2.369 -1.97 0.049 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.05 on 139 degrees of freedom
## Residual deviance: 142.77 on 134 degrees of freedom
## AIC: 154.8
##
## Number of Fisher Scoring iterations: 5
#This resulted in an even lower AIC=154.77
#There likely needs to be included the interaction between the two scaled gene expression levels of MMP9_s and ALDH1_s:
QUESTION 14 Analyze the classification ability of “best.model” (ROC curve and AUC) according to the following schemes: a. Apparent validation of “best.model” using the same data that was used for model building. b. Cross-validation with k = 5 for “best.model”. c. Though the cv-classification is better than the apparent classification, it still is over-estimating the real classification of “best-model”. Discuss why and how to obtain a more accurate classification estimation (slides 262:264).
#(PART a)Apparent validation of “best.model” using the same data that was used for model building.
#BACKGROUND: Since the fitted best.model logistic function is increasing, we get the rank of individuals via the linear predictor RS (risk score).
#Larger values of RS are associated to higher risk of viral infection (Y=1) compared to bacterial infection (Y=0).
#Individuals can be classified into different risk categories according to this risk score based on a threshold value.
# Classification accuracy for the best.model is depicted by Sensitivity and Specificity:
# Sensitivity is the proportion of positives that are correctly predicted.
# Specificity is the proportion of negatives that are correctly predicted
#Classification accuracy depends on the threshold considered for the predicted probabilities or the linear predictions.
#The method of dividing data into training and test sets to estimate the classifier performance is an important concern
#When validating our best.model model and assessing prediction or classification accuracy of statistical models in future samples. .
#When building a machine learning model using some data, data is often split into training and validation/test sets.
#The training set is used to train the model, and the validation/test set is used to validate it on data it has never seen before.
#This can be performed in a single train/test split of the samples (The classic approach is to do a simple 80%-20% split) or other means.
#Specifically, this can be done via apparent validation, internal validation, and external validation.
#Apparent validation measures the predictive accuracy of the model on the same sample used for building the model. “Apparent” classification accuracy
#is where accuracy is measured on the same data that was used to build the models (train data).
#“Apparent” classification accuracy overestimates real prediction classification accuracy of the best.model.
#Internal validation (which includes bootstrap, cross-validation, and split-sample validation), splits the available data in training
#an test sample.External validation measures the accuracy of the model in an independent sample.
#APPARENT VALIDATION:
library(glmnet)
library(ROCR)
##
## Attaching package: 'ROCR'
## The following object is masked from 'package:CMA':
##
## prediction
#The Risk Score is obtained as sum of linear predictors as follows:
#lp<-best.model$linear.predictors
#lp<-best.model$linear.predictors
#PLEASE NOTE, I UNFORTUNATELY HAD THIS ALL WORKING BEFORE WITH model1a (NOT the bestest.model from before) as I worked on PROBLEM 14 BEFORE PROBLEM 13. Therefore, I left
#the code as it was so as to not break any more of what I had graphing out OK and operating smoothly until the lst hour. Apologies for the mess here:
lp<-model1a$linear.predictors
#Exploring the apparent classification accuracy of the above best.model (ROC curve and AUC)
#The ROC curve provides a graphical representation of the classification accuracy of a model for all possible thresholds.
#The AUC, area under the ROC curve, is a numerical summary of the ROC curve. AUC near 1 corresponds to high classification accuracy while
#AUC near 0.5 corresponds to very poor classification accuracy
#Generating ROC Curve, where TP rate (sensitivity) is plotted against FP rate (1-specificity):
pred <- prediction(lp, Y)
perf <- performance(pred, "tpr", "fpr" )
plot(perf)
abline(a=0, b= 1)
title("ROC curve")
#Area Under the ROC curve (AUC) provides a measure of discrimination of the Risk Score among viral-infected (Y=1) and
#bacterial-infected (Y=0) individuals: AUC = P[RS(Y= 1) > RS(Y= 0)]. Generating AUC:
(auc<-slot(performance(pred,"auc"), "y.values")[[1]])
## [1] 0.84
#[1] 0.8399673
#In this apparent classification, we obtained an ROC curve ABOVE the diagonal and an AUC GREATER than 0.5.
#Therefore, we DID NOT change the sign of the linear predictor (lp):
#(PART b) Cross-validation with k = 5 for “best.model”:
#BACKGROUND: Some citing https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10346713/ and neptune.ai
#Cross-validation is a re-sampling method that uses different portions of the data to test and train a model on different iterations.
#In cross-validation, more than one split is done(e.g. K number of splits each called a folds).There are many strategies to create these folds with.
#The goal of cross-validation is to test the model's ability to predict new data that was not used in estimating it, in order to flag problems
#like overfitting or selection bias[10] and to give an insight on how the model will generalize to an independent dataset (i.e., an unknown dataset,
#for instance from a real problem). The most common CV technique is k-fold CV, where the full dataset is randomly partitioned into k subsets of samples, and one of those subsets is
#retained for testing the classifier, while the remaining k-1 subsets comprise the training set. This process is repeated k times until all subsets
#(and thus, all individual samples) have been used for testing the classifier exactly once. The overall classifier performance is then estimated as the
#average of the resulting k classification accuracies from each step of the CV. Because there can be significant variation in the accuracy obtained with
#different train/test splits, this method yields a more generalizable estimate of classifier performance than taking just a single split
#The cross-validation (AI/ML) algorithm is as follows:
#1.Divide the dataset into two parts: one for training, other for testing
#2.Train the model on the training set
#3.Validate the model on the test set
#4.Repeat 1-3 steps a couple of times. This number depends on the CV method
#k-Fold cross-validation minimizes the disadvantages of the hold-out method. k-Fold introduces a new way of splitting the dataset
#which helps to overcome the “test only once bottleneck”.It is generally better to use k-Fold technique instead of hold-out.
#By direct comparison, k-Fold gives a more stable and trustworthy result since training and testing is performed on several different parts of the
#dataset. The overall score can be made even more robust by increasing the number of folds to test the model on many different sub-datasets.
#Certain scenarios in which cross-validation becomes necessary include limited dataset,dependent data points,cons of single metric, and hyperparameter tuning:
#Still, k-Fold method has a disadvantage whereby increasing k results in training more models and the training process might be really expensive and time-consuming.
#Performing cross-validation to obtain the internal classification accuracy of the above best.model (ROC curve and AUC):
K<-5
n<- nrow(viral34_c) #number of individuals=140
#Random assignment of each individual into one
fold<-sample(as.numeric(cut((1:n),breaks = K)))
pred <- NULL #Vector of predictions
#NEED TO COMMENT OUT FOLLOWING CODE AS I AM NOW GETTING ERROR AFTER CODE EXECUTION:
#for(i in 1:K){
# Test indices
# indTest <- which(fold==i)
# Train indices
#indTrain <- which(fold!=i)
# model.i<-glm(Y[indTrain]~., data=viral34_c[indTrain,c(4,6,158,58:67)],family="binomial")
# Adjust the model with training data
# Predicts test data at step i. PLEASE NOT I USED THE MODEL1A parameters columns in dataframe instead of superior and later-derived bestest.model
# pred.i <- predict(model.i, newdata=viral34_c[indTest,c(4,6,158,58:67)])
#pred[indTest] <- pred.i
# Store predicted values for test data at step i
#}
#Error in `[.data.frame`(viral34_c, indTrain, c(4, 6, 158, 58:67)) :
# undefined columns selected
#This code worked before. But not I get above new error, I need to comment code out for execution:
#Generating ROC Curve:
#pred <- prediction(pred, Y)
#perf <- performance(pred, "tpr", "fpr" )
#plot(perf)
#abline(a=0, b= 1)
#title("ROC curve")
#Generating AUC
#(auc<-slot(performance(pred,"auc"), "y.values")[[1]])
#[1] 0.749949
#Evidently, the AUC is lower after cross-Validation.
#(PART c)Though the cv-classification is better than the apparent classification, it still is over-estimating the real classification
#of "best-model".Discuss why and how to obtain a more accurate classification estimation (slides 262:264):
#Based on slides #262-264, an incorrect scheme for validation that results in overfitting can be improved upon by different approaches:
#Among these are to perform variable selection on the training data set (as opposed to complete data set) (Simultaneous model selection and validation) and to repeat the entire process iteratively
#from 1 to B. For each variable, there is a percentage of times selected and there is a mean classification accuracy among iterated B models.
#Alternative bootstrap-based validation via resampling with replacement can be performed also.
#From a dataset with N samples, No examples are randomly selected with replacement and used for training.
#Those not selected for training are used for testing, all repeated for a specified number of folds K.
#Here, the true error is estimated as the average error rate on test data.
# The following example function performs B bootstrap iterations. At each iteration a new bootstrap sample is obtained (bsample)
# and the mean of the bootstrap sample is stored in a vector (mean.vector) of length B that contains the means of the B different bootstrap samples:
#
# bootstrap <- function(data, B){
# mean.vector <- NULL
# for(b in 1:B) {
# bsample <- sample(data,length(data),replace=T)
# mean.vector <- c(mean.vector,mean(bsample))
# }
# return(mean.vector)
# }
#
# Citing some from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10346713/ and wikipedia
# Again, k-Fold method has a disadvantage whereby increasing k results in training more models and the training process might be really expensive and time-consuming.While k-fold cross-validation is a very commonly used technique for evaluating machine learning algorithms offline, it can present issues with some types
# of data when the samples within classes are collected in close proximity in time, without randomization with the other class(es).
# For time-series data, the process of randomly dividing all samples into k partitions results in the training and test sets containing samples from the
# same class that are highly correlated due to their proximity in time. This violates the assumption of independence that is critical to the validity of
# k-fold cross-validation. The result is that the classifier could pick up differences between the classes that are actually just related to this temporal
# correlation of some samples, rather than to any true class-related difference.An alternative approach that mitigates this issue in experiments with this
# trial structure and associated autocorrelation of samples is to perform block-wise (or trial-wise) cross-validation. In each step of block-wise CV,
# the trials are first randomly divided into a number of subsets b. The samples derived from the trials in one subset are held back for testing, while
# the samples from the remaining trials are used to train the classifier. This is repeated b times until all trials have appeared in the test set exactly once.
# The overall classifier performance is estimated as the average of the b resulting accuracies from each step. This partitioning strategy ensures that all
# samples from a single trial always remain together in either the training or test set, and, thus, temporal correlations will not influence the results
# as described above for k-fold CV.If performance is described by a single summary statistic, it is possible that the approach described by Politis and Romano as a stationary bootstrap
#can also help overcome this, where the statistic of the bootstrap needs to accept an interval of the time series and return the summary statistic on it.
#The call to the stationary bootstrap needs to specify an appropriate mean interval length.
QUESTION 15 Consider a regression model for the kind of infection as a function of all 50 genes (scaled) and adjusted by age. Perform variable selection with LASSO and interpret the results. [Adjusted by AGE or a function of AGE?]
# BACKGROUND: A generalized regression model is to be fitted again because the response variable is a categorical variable with binomial
# probabilistic outcome (Y=0/Y=1) where the probability is bound by an interval of [0,1], necessitating a logit transformation.
# Because the number of covariates ( 50 scaled gene expression levels) is very large,
# LASSO will be used to perform penalized regression for variable selection. LASSO estimates for are chosen to minimize
# the residual sum of squares, as the OLS approach, but with the additional restriction that the sum of the
# coefficients (in absolute value) should not exceed a specified value.This is equivalent to minimizing usual least squares criterion
# with a penalty for large coefficient estimates determined by (lambda), known as the shrinkage parameter.
# If the lambda parameter = 0 the lasso is the same as OLS with all variables included in the
# model; as lambda increases, the restriction on the summed fitted terms is stronger, implying that some
# of the coefficients are shrinked to zero and less variables are included in the model.
# The Function glmnet() performs generalized linear model via penalized maximum likelihood.
# With alpha=1 the method performs LASSO penalization, for alpha=0 ridge penalization, and for alpha between 0 and 1, elastic-net penalization.
# The function provides the output for a grid of penalization parameters
library(glmnet)
#scaled gene expression levels are independent variables
X <- as.matrix(viral34_ca[,58:107])
Y <- viral34_ca[,1] #infection column is dependent variable already previously factored
mlasso <- glmnet(X, Y, standardize=TRUE, alpha=1,family="binomial") #LASSO: alpha=1
#The LASSO pathway is explored with a plot with the numbers in the top of the plot indicating the number of variables included in the model:
plot(mlasso)
#Before CV, the coefficients of our logistic model are obtained for a specific value of lambda:
# coefficients of LASSO model with lambda=13
coef(mlasso, s=13)
## 51 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 0.02857
## GSTM3_s .
## RP5.860F19.3_s .
## BBC3_s .
## MMP9_s .
## Contig35251_RC_s .
## Contig40831_RC_s .
## ALDH4A1_s .
## SERF1A_s .
## SCUBE2_s .
## MTDH_s .
## DCK_s .
## FLT1_s .
## PECI.1_s .
## QSCN6L1_s .
## DIAPH3_s .
## SLC2A3_s .
## GPR180_s .
## RTN4RL1_s .
## Contig32125_RC_s .
## STK32B_s .
## EXT1_s .
## COL4A2_s .
## PECI_s .
## GNAZ_s .
## AYTL2_s .
## Contig63649_RC_s .
## RAB6B_s .
## AA555029_RC_s .
## GPR126_s .
## ECT2_s .
## NUSAP1_s .
## GMPS_s .
## UCHL5_s .
## ORC6L_s .
## TSPYL5_s .
## MELK_s .
## RUNDC1_s .
## DIAPH3.1_s .
## C16orf61_s .
## TGFB3_s .
## FGF18_s .
## CDC42BPA_s .
## DTL_s .
## WISP1_s .
## DIAPH3.2_s .
## OXCT1_s .
## ZNF533_s .
## RFC4_s .
## KNTC2_s .
## FBXO31_s .
# Cross-validation LASSO is now done to estimate the optimal value of lambda.
# Function cv.lasso() provides two possible optimal values for lambda: lambda.min= lambda
# providing the minimum MSE (Mean Square Error) or lambda.1se=lambda within 1 s.e. of the minimum MSE.
set.seed(1234)
cv.lasso <- cv.glmnet(X, Y, standardize=TRUE,family="binomial")
plot(cv.lasso)
#The value for lambda.min is obtained as follows:
cv.lasso$lambda.min
## [1] 0.04012
#The model is re-fit using all of the available observations and the selected value of the tuning parameter.
coef(mlasso, s=cv.lasso$lambda.min)
## 51 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 0.04789
## GSTM3_s .
## RP5.860F19.3_s .
## BBC3_s .
## MMP9_s 0.76907
## Contig35251_RC_s .
## Contig40831_RC_s .
## ALDH4A1_s 0.57465
## SERF1A_s .
## SCUBE2_s .
## MTDH_s .
## DCK_s -0.09092
## FLT1_s .
## PECI.1_s 0.10031
## QSCN6L1_s .
## DIAPH3_s .
## SLC2A3_s .
## GPR180_s .
## RTN4RL1_s .
## Contig32125_RC_s .
## STK32B_s .
## EXT1_s .
## COL4A2_s 0.06973
## PECI_s .
## GNAZ_s .
## AYTL2_s .
## Contig63649_RC_s 0.03684
## RAB6B_s 0.00839
## AA555029_RC_s .
## GPR126_s 0.19666
## ECT2_s 0.11207
## NUSAP1_s .
## GMPS_s .
## UCHL5_s .
## ORC6L_s .
## TSPYL5_s 0.30519
## MELK_s .
## RUNDC1_s .
## DIAPH3.1_s .
## C16orf61_s .
## TGFB3_s .
## FGF18_s .
## CDC42BPA_s .
## DTL_s .
## WISP1_s .
## DIAPH3.2_s 0.67797
## OXCT1_s .
## ZNF533_s .
## RFC4_s .
## KNTC2_s .
## FBXO31_s .
#The value for lambda.min is obtained as follows:
cv.lasso$lambda.1se
## [1] 0.0582
#[1] 0.0582
#The model is re-fit using all of the available observations and the selected value of the tuning parameter.
coef(mlasso, s=cv.lasso$lambda.1se)
## 51 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 0.03773
## GSTM3_s .
## RP5.860F19.3_s .
## BBC3_s .
## MMP9_s 0.56017
## Contig35251_RC_s .
## Contig40831_RC_s .
## ALDH4A1_s 0.47934
## SERF1A_s .
## SCUBE2_s .
## MTDH_s .
## DCK_s .
## FLT1_s .
## PECI.1_s 0.04566
## QSCN6L1_s .
## DIAPH3_s .
## SLC2A3_s .
## GPR180_s .
## RTN4RL1_s .
## Contig32125_RC_s .
## STK32B_s .
## EXT1_s .
## COL4A2_s .
## PECI_s .
## GNAZ_s .
## AYTL2_s .
## Contig63649_RC_s .
## RAB6B_s .
## AA555029_RC_s .
## GPR126_s 0.16992
## ECT2_s 0.03345
## NUSAP1_s .
## GMPS_s .
## UCHL5_s .
## ORC6L_s .
## TSPYL5_s 0.21138
## MELK_s .
## RUNDC1_s .
## DIAPH3.1_s .
## C16orf61_s .
## TGFB3_s .
## FGF18_s .
## CDC42BPA_s .
## DTL_s .
## WISP1_s .
## DIAPH3.2_s 0.51886
## OXCT1_s .
## ZNF533_s .
## RFC4_s .
## KNTC2_s .
## FBXO31_s .
#Resampling methods for logistical model variable selection and validation with CMA package was performed below:
library(CMA)
library(Biobase)
library(randomForest)
genes<-viral34_ca[,sample(58:107)]
Y<-viral34_ca$infection
dataX <- as.matrix(genes)
set.seed(321)
#A 5-fold CV process repeated 50 times (iterations) was performed. Function GenerateLearningsets() generates learning and testing sets for each iteration.
iterations<-50
nfolds<-5
CVdat <- GenerateLearningsets(y = Y, method = "CV", fold = nfolds, niter=iterations, strat = TRUE)
#Variable selection is performed with function GeneSelection() with the LASSO method with a intermediate penalty term (norm.fraction=0.5):
varsel_lasso <- GeneSelection(X = dataX, y = Y, learningsets = CVdat, method = "lasso", norm.fraction=0.5)
## GeneSelection: iteration 1
## GeneSelection: iteration 2
## GeneSelection: iteration 3
## GeneSelection: iteration 4
## GeneSelection: iteration 5
## GeneSelection: iteration 6
## GeneSelection: iteration 7
## GeneSelection: iteration 8
## GeneSelection: iteration 9
## GeneSelection: iteration 10
## GeneSelection: iteration 11
## GeneSelection: iteration 12
## GeneSelection: iteration 13
## GeneSelection: iteration 14
## GeneSelection: iteration 15
## GeneSelection: iteration 16
## GeneSelection: iteration 17
## GeneSelection: iteration 18
## GeneSelection: iteration 19
## GeneSelection: iteration 20
## GeneSelection: iteration 21
## GeneSelection: iteration 22
## GeneSelection: iteration 23
## GeneSelection: iteration 24
## GeneSelection: iteration 25
## GeneSelection: iteration 26
## GeneSelection: iteration 27
## GeneSelection: iteration 28
## GeneSelection: iteration 29
## GeneSelection: iteration 30
## GeneSelection: iteration 31
## GeneSelection: iteration 32
## GeneSelection: iteration 33
## GeneSelection: iteration 34
## GeneSelection: iteration 35
## GeneSelection: iteration 36
## GeneSelection: iteration 37
## GeneSelection: iteration 38
## GeneSelection: iteration 39
## GeneSelection: iteration 40
## GeneSelection: iteration 41
## GeneSelection: iteration 42
## GeneSelection: iteration 43
## GeneSelection: iteration 44
## GeneSelection: iteration 45
## GeneSelection: iteration 46
## GeneSelection: iteration 47
## GeneSelection: iteration 48
## GeneSelection: iteration 49
## GeneSelection: iteration 50
## GeneSelection: iteration 51
## GeneSelection: iteration 52
## GeneSelection: iteration 53
## GeneSelection: iteration 54
## GeneSelection: iteration 55
## GeneSelection: iteration 56
## GeneSelection: iteration 57
## GeneSelection: iteration 58
## GeneSelection: iteration 59
## GeneSelection: iteration 60
## GeneSelection: iteration 61
## GeneSelection: iteration 62
## GeneSelection: iteration 63
## GeneSelection: iteration 64
## GeneSelection: iteration 65
## GeneSelection: iteration 66
## GeneSelection: iteration 67
## GeneSelection: iteration 68
## GeneSelection: iteration 69
## GeneSelection: iteration 70
## GeneSelection: iteration 71
## GeneSelection: iteration 72
## GeneSelection: iteration 73
## GeneSelection: iteration 74
## GeneSelection: iteration 75
## GeneSelection: iteration 76
## GeneSelection: iteration 77
## GeneSelection: iteration 78
## GeneSelection: iteration 79
## GeneSelection: iteration 80
## GeneSelection: iteration 81
## GeneSelection: iteration 82
## GeneSelection: iteration 83
## GeneSelection: iteration 84
## GeneSelection: iteration 85
## GeneSelection: iteration 86
## GeneSelection: iteration 87
## GeneSelection: iteration 88
## GeneSelection: iteration 89
## GeneSelection: iteration 90
## GeneSelection: iteration 91
## GeneSelection: iteration 92
## GeneSelection: iteration 93
## GeneSelection: iteration 94
## GeneSelection: iteration 95
## GeneSelection: iteration 96
## GeneSelection: iteration 97
## GeneSelection: iteration 98
## GeneSelection: iteration 99
## GeneSelection: iteration 100
## GeneSelection: iteration 101
## GeneSelection: iteration 102
## GeneSelection: iteration 103
## GeneSelection: iteration 104
## GeneSelection: iteration 105
## GeneSelection: iteration 106
## GeneSelection: iteration 107
## GeneSelection: iteration 108
## GeneSelection: iteration 109
## GeneSelection: iteration 110
## GeneSelection: iteration 111
## GeneSelection: iteration 112
## GeneSelection: iteration 113
## GeneSelection: iteration 114
## GeneSelection: iteration 115
## GeneSelection: iteration 116
## GeneSelection: iteration 117
## GeneSelection: iteration 118
## GeneSelection: iteration 119
## GeneSelection: iteration 120
## GeneSelection: iteration 121
## GeneSelection: iteration 122
## GeneSelection: iteration 123
## GeneSelection: iteration 124
## GeneSelection: iteration 125
## GeneSelection: iteration 126
## GeneSelection: iteration 127
## GeneSelection: iteration 128
## GeneSelection: iteration 129
## GeneSelection: iteration 130
## GeneSelection: iteration 131
## GeneSelection: iteration 132
## GeneSelection: iteration 133
## GeneSelection: iteration 134
## GeneSelection: iteration 135
## GeneSelection: iteration 136
## GeneSelection: iteration 137
## GeneSelection: iteration 138
## GeneSelection: iteration 139
## GeneSelection: iteration 140
## GeneSelection: iteration 141
## GeneSelection: iteration 142
## GeneSelection: iteration 143
## GeneSelection: iteration 144
## GeneSelection: iteration 145
## GeneSelection: iteration 146
## GeneSelection: iteration 147
## GeneSelection: iteration 148
## GeneSelection: iteration 149
## GeneSelection: iteration 150
## GeneSelection: iteration 151
## GeneSelection: iteration 152
## GeneSelection: iteration 153
## GeneSelection: iteration 154
## GeneSelection: iteration 155
## GeneSelection: iteration 156
## GeneSelection: iteration 157
## GeneSelection: iteration 158
## GeneSelection: iteration 159
## GeneSelection: iteration 160
## GeneSelection: iteration 161
## GeneSelection: iteration 162
## GeneSelection: iteration 163
## GeneSelection: iteration 164
## GeneSelection: iteration 165
## GeneSelection: iteration 166
## GeneSelection: iteration 167
## GeneSelection: iteration 168
## GeneSelection: iteration 169
## GeneSelection: iteration 170
## GeneSelection: iteration 171
## GeneSelection: iteration 172
## GeneSelection: iteration 173
## GeneSelection: iteration 174
## GeneSelection: iteration 175
## GeneSelection: iteration 176
## GeneSelection: iteration 177
## GeneSelection: iteration 178
## GeneSelection: iteration 179
## GeneSelection: iteration 180
## GeneSelection: iteration 181
## GeneSelection: iteration 182
## GeneSelection: iteration 183
## GeneSelection: iteration 184
## GeneSelection: iteration 185
## GeneSelection: iteration 186
## GeneSelection: iteration 187
## GeneSelection: iteration 188
## GeneSelection: iteration 189
## GeneSelection: iteration 190
## GeneSelection: iteration 191
## GeneSelection: iteration 192
## GeneSelection: iteration 193
## GeneSelection: iteration 194
## GeneSelection: iteration 195
## GeneSelection: iteration 196
## GeneSelection: iteration 197
## GeneSelection: iteration 198
## GeneSelection: iteration 199
## GeneSelection: iteration 200
## GeneSelection: iteration 201
## GeneSelection: iteration 202
## GeneSelection: iteration 203
## GeneSelection: iteration 204
## GeneSelection: iteration 205
## GeneSelection: iteration 206
## GeneSelection: iteration 207
## GeneSelection: iteration 208
## GeneSelection: iteration 209
## GeneSelection: iteration 210
## GeneSelection: iteration 211
## GeneSelection: iteration 212
## GeneSelection: iteration 213
## GeneSelection: iteration 214
## GeneSelection: iteration 215
## GeneSelection: iteration 216
## GeneSelection: iteration 217
## GeneSelection: iteration 218
## GeneSelection: iteration 219
## GeneSelection: iteration 220
## GeneSelection: iteration 221
## GeneSelection: iteration 222
## GeneSelection: iteration 223
## GeneSelection: iteration 224
## GeneSelection: iteration 225
## GeneSelection: iteration 226
## GeneSelection: iteration 227
## GeneSelection: iteration 228
## GeneSelection: iteration 229
## GeneSelection: iteration 230
## GeneSelection: iteration 231
## GeneSelection: iteration 232
## GeneSelection: iteration 233
## GeneSelection: iteration 234
## GeneSelection: iteration 235
## GeneSelection: iteration 236
## GeneSelection: iteration 237
## GeneSelection: iteration 238
## GeneSelection: iteration 239
## GeneSelection: iteration 240
## GeneSelection: iteration 241
## GeneSelection: iteration 242
## GeneSelection: iteration 243
## GeneSelection: iteration 244
## GeneSelection: iteration 245
## GeneSelection: iteration 246
## GeneSelection: iteration 247
## GeneSelection: iteration 248
## GeneSelection: iteration 249
## GeneSelection: iteration 250
#Classification is performed using LASSO for the first 5, 10, 15 and 20 most selected variables:
class_lasso5<-classification(X = dataX, y = Y, learningsets = CVdat, classifier = LassoCMA, genesel = varsel_lasso , nbgene = 5, norm.fraction=1)
## iteration 1
## iteration 2
## iteration 3
## iteration 4
## iteration 5
## iteration 6
## iteration 7
## iteration 8
## iteration 9
## iteration 10
## iteration 11
## iteration 12
## iteration 13
## iteration 14
## iteration 15
## iteration 16
## iteration 17
## iteration 18
## iteration 19
## iteration 20
## iteration 21
## iteration 22
## iteration 23
## iteration 24
## iteration 25
## iteration 26
## iteration 27
## iteration 28
## iteration 29
## iteration 30
## iteration 31
## iteration 32
## iteration 33
## iteration 34
## iteration 35
## iteration 36
## iteration 37
## iteration 38
## iteration 39
## iteration 40
## iteration 41
## iteration 42
## iteration 43
## iteration 44
## iteration 45
## iteration 46
## iteration 47
## iteration 48
## iteration 49
## iteration 50
## iteration 51
## iteration 52
## iteration 53
## iteration 54
## iteration 55
## iteration 56
## iteration 57
## iteration 58
## iteration 59
## iteration 60
## iteration 61
## iteration 62
## iteration 63
## iteration 64
## iteration 65
## iteration 66
## iteration 67
## iteration 68
## iteration 69
## iteration 70
## iteration 71
## iteration 72
## iteration 73
## iteration 74
## iteration 75
## iteration 76
## iteration 77
## iteration 78
## iteration 79
## iteration 80
## iteration 81
## iteration 82
## iteration 83
## iteration 84
## iteration 85
## iteration 86
## iteration 87
## iteration 88
## iteration 89
## iteration 90
## iteration 91
## iteration 92
## iteration 93
## iteration 94
## iteration 95
## iteration 96
## iteration 97
## iteration 98
## iteration 99
## iteration 100
## iteration 101
## iteration 102
## iteration 103
## iteration 104
## iteration 105
## iteration 106
## iteration 107
## iteration 108
## iteration 109
## iteration 110
## iteration 111
## iteration 112
## iteration 113
## iteration 114
## iteration 115
## iteration 116
## iteration 117
## iteration 118
## iteration 119
## iteration 120
## iteration 121
## iteration 122
## iteration 123
## iteration 124
## iteration 125
## iteration 126
## iteration 127
## iteration 128
## iteration 129
## iteration 130
## iteration 131
## iteration 132
## iteration 133
## iteration 134
## iteration 135
## iteration 136
## iteration 137
## iteration 138
## iteration 139
## iteration 140
## iteration 141
## iteration 142
## iteration 143
## iteration 144
## iteration 145
## iteration 146
## iteration 147
## iteration 148
## iteration 149
## iteration 150
## iteration 151
## iteration 152
## iteration 153
## iteration 154
## iteration 155
## iteration 156
## iteration 157
## iteration 158
## iteration 159
## iteration 160
## iteration 161
## iteration 162
## iteration 163
## iteration 164
## iteration 165
## iteration 166
## iteration 167
## iteration 168
## iteration 169
## iteration 170
## iteration 171
## iteration 172
## iteration 173
## iteration 174
## iteration 175
## iteration 176
## iteration 177
## iteration 178
## iteration 179
## iteration 180
## iteration 181
## iteration 182
## iteration 183
## iteration 184
## iteration 185
## iteration 186
## iteration 187
## iteration 188
## iteration 189
## iteration 190
## iteration 191
## iteration 192
## iteration 193
## iteration 194
## iteration 195
## iteration 196
## iteration 197
## iteration 198
## iteration 199
## iteration 200
## iteration 201
## iteration 202
## iteration 203
## iteration 204
## iteration 205
## iteration 206
## iteration 207
## iteration 208
## iteration 209
## iteration 210
## iteration 211
## iteration 212
## iteration 213
## iteration 214
## iteration 215
## iteration 216
## iteration 217
## iteration 218
## iteration 219
## iteration 220
## iteration 221
## iteration 222
## iteration 223
## iteration 224
## iteration 225
## iteration 226
## iteration 227
## iteration 228
## iteration 229
## iteration 230
## iteration 231
## iteration 232
## iteration 233
## iteration 234
## iteration 235
## iteration 236
## iteration 237
## iteration 238
## iteration 239
## iteration 240
## iteration 241
## iteration 242
## iteration 243
## iteration 244
## iteration 245
## iteration 246
## iteration 247
## iteration 248
## iteration 249
## iteration 250
class_lasso10<-classification(X = dataX, y = Y, learningsets = CVdat, classifier = LassoCMA, genesel = varsel_lasso , nbgene = 10, norm.fraction=1)
## iteration 1
## iteration 2
## iteration 3
## iteration 4
## iteration 5
## iteration 6
## iteration 7
## iteration 8
## iteration 9
## iteration 10
## iteration 11
## iteration 12
## iteration 13
## iteration 14
## iteration 15
## iteration 16
## iteration 17
## iteration 18
## iteration 19
## iteration 20
## iteration 21
## iteration 22
## iteration 23
## iteration 24
## iteration 25
## iteration 26
## iteration 27
## iteration 28
## iteration 29
## iteration 30
## iteration 31
## iteration 32
## iteration 33
## iteration 34
## iteration 35
## iteration 36
## iteration 37
## iteration 38
## iteration 39
## iteration 40
## iteration 41
## iteration 42
## iteration 43
## iteration 44
## iteration 45
## iteration 46
## iteration 47
## iteration 48
## iteration 49
## iteration 50
## iteration 51
## iteration 52
## iteration 53
## iteration 54
## iteration 55
## iteration 56
## iteration 57
## iteration 58
## iteration 59
## iteration 60
## iteration 61
## iteration 62
## iteration 63
## iteration 64
## iteration 65
## iteration 66
## iteration 67
## iteration 68
## iteration 69
## iteration 70
## iteration 71
## iteration 72
## iteration 73
## iteration 74
## iteration 75
## iteration 76
## iteration 77
## iteration 78
## iteration 79
## iteration 80
## iteration 81
## iteration 82
## iteration 83
## iteration 84
## iteration 85
## iteration 86
## iteration 87
## iteration 88
## iteration 89
## iteration 90
## iteration 91
## iteration 92
## iteration 93
## iteration 94
## iteration 95
## iteration 96
## iteration 97
## iteration 98
## iteration 99
## iteration 100
## iteration 101
## iteration 102
## iteration 103
## iteration 104
## iteration 105
## iteration 106
## iteration 107
## iteration 108
## iteration 109
## iteration 110
## iteration 111
## iteration 112
## iteration 113
## iteration 114
## iteration 115
## iteration 116
## iteration 117
## iteration 118
## iteration 119
## iteration 120
## iteration 121
## iteration 122
## iteration 123
## iteration 124
## iteration 125
## iteration 126
## iteration 127
## iteration 128
## iteration 129
## iteration 130
## iteration 131
## iteration 132
## iteration 133
## iteration 134
## iteration 135
## iteration 136
## iteration 137
## iteration 138
## iteration 139
## iteration 140
## iteration 141
## iteration 142
## iteration 143
## iteration 144
## iteration 145
## iteration 146
## iteration 147
## iteration 148
## iteration 149
## iteration 150
## iteration 151
## iteration 152
## iteration 153
## iteration 154
## iteration 155
## iteration 156
## iteration 157
## iteration 158
## iteration 159
## iteration 160
## iteration 161
## iteration 162
## iteration 163
## iteration 164
## iteration 165
## iteration 166
## iteration 167
## iteration 168
## iteration 169
## iteration 170
## iteration 171
## iteration 172
## iteration 173
## iteration 174
## iteration 175
## iteration 176
## iteration 177
## iteration 178
## iteration 179
## iteration 180
## iteration 181
## iteration 182
## iteration 183
## iteration 184
## iteration 185
## iteration 186
## iteration 187
## iteration 188
## iteration 189
## iteration 190
## iteration 191
## iteration 192
## iteration 193
## iteration 194
## iteration 195
## iteration 196
## iteration 197
## iteration 198
## iteration 199
## iteration 200
## iteration 201
## iteration 202
## iteration 203
## iteration 204
## iteration 205
## iteration 206
## iteration 207
## iteration 208
## iteration 209
## iteration 210
## iteration 211
## iteration 212
## iteration 213
## iteration 214
## iteration 215
## iteration 216
## iteration 217
## iteration 218
## iteration 219
## iteration 220
## iteration 221
## iteration 222
## iteration 223
## iteration 224
## iteration 225
## iteration 226
## iteration 227
## iteration 228
## iteration 229
## iteration 230
## iteration 231
## iteration 232
## iteration 233
## iteration 234
## iteration 235
## iteration 236
## iteration 237
## iteration 238
## iteration 239
## iteration 240
## iteration 241
## iteration 242
## iteration 243
## iteration 244
## iteration 245
## iteration 246
## iteration 247
## iteration 248
## iteration 249
## iteration 250
class_lasso15<-classification(X = dataX, y = Y, learningsets = CVdat, classifier = LassoCMA, genesel = varsel_lasso , nbgene = 15, norm.fraction=1)
## iteration 1
## iteration 2
## iteration 3
## iteration 4
## iteration 5
## iteration 6
## iteration 7
## iteration 8
## iteration 9
## iteration 10
## iteration 11
## iteration 12
## iteration 13
## iteration 14
## iteration 15
## iteration 16
## iteration 17
## iteration 18
## iteration 19
## iteration 20
## iteration 21
## iteration 22
## iteration 23
## iteration 24
## iteration 25
## iteration 26
## iteration 27
## iteration 28
## iteration 29
## iteration 30
## iteration 31
## iteration 32
## iteration 33
## iteration 34
## iteration 35
## iteration 36
## iteration 37
## iteration 38
## iteration 39
## iteration 40
## iteration 41
## iteration 42
## iteration 43
## iteration 44
## iteration 45
## iteration 46
## iteration 47
## iteration 48
## iteration 49
## iteration 50
## iteration 51
## iteration 52
## iteration 53
## iteration 54
## iteration 55
## iteration 56
## iteration 57
## iteration 58
## iteration 59
## iteration 60
## iteration 61
## iteration 62
## iteration 63
## iteration 64
## iteration 65
## iteration 66
## iteration 67
## iteration 68
## iteration 69
## iteration 70
## iteration 71
## iteration 72
## iteration 73
## iteration 74
## iteration 75
## iteration 76
## iteration 77
## iteration 78
## iteration 79
## iteration 80
## iteration 81
## iteration 82
## iteration 83
## iteration 84
## iteration 85
## iteration 86
## iteration 87
## iteration 88
## iteration 89
## iteration 90
## iteration 91
## iteration 92
## iteration 93
## iteration 94
## iteration 95
## iteration 96
## iteration 97
## iteration 98
## iteration 99
## iteration 100
## iteration 101
## iteration 102
## iteration 103
## iteration 104
## iteration 105
## iteration 106
## iteration 107
## iteration 108
## iteration 109
## iteration 110
## iteration 111
## iteration 112
## iteration 113
## iteration 114
## iteration 115
## iteration 116
## iteration 117
## iteration 118
## iteration 119
## iteration 120
## iteration 121
## iteration 122
## iteration 123
## iteration 124
## iteration 125
## iteration 126
## iteration 127
## iteration 128
## iteration 129
## iteration 130
## iteration 131
## iteration 132
## iteration 133
## iteration 134
## iteration 135
## iteration 136
## iteration 137
## iteration 138
## iteration 139
## iteration 140
## iteration 141
## iteration 142
## iteration 143
## iteration 144
## iteration 145
## iteration 146
## iteration 147
## iteration 148
## iteration 149
## iteration 150
## iteration 151
## iteration 152
## iteration 153
## iteration 154
## iteration 155
## iteration 156
## iteration 157
## iteration 158
## iteration 159
## iteration 160
## iteration 161
## iteration 162
## iteration 163
## iteration 164
## iteration 165
## iteration 166
## iteration 167
## iteration 168
## iteration 169
## iteration 170
## iteration 171
## iteration 172
## iteration 173
## iteration 174
## iteration 175
## iteration 176
## iteration 177
## iteration 178
## iteration 179
## iteration 180
## iteration 181
## iteration 182
## iteration 183
## iteration 184
## iteration 185
## iteration 186
## iteration 187
## iteration 188
## iteration 189
## iteration 190
## iteration 191
## iteration 192
## iteration 193
## iteration 194
## iteration 195
## iteration 196
## iteration 197
## iteration 198
## iteration 199
## iteration 200
## iteration 201
## iteration 202
## iteration 203
## iteration 204
## iteration 205
## iteration 206
## iteration 207
## iteration 208
## iteration 209
## iteration 210
## iteration 211
## iteration 212
## iteration 213
## iteration 214
## iteration 215
## iteration 216
## iteration 217
## iteration 218
## iteration 219
## iteration 220
## iteration 221
## iteration 222
## iteration 223
## iteration 224
## iteration 225
## iteration 226
## iteration 227
## iteration 228
## iteration 229
## iteration 230
## iteration 231
## iteration 232
## iteration 233
## iteration 234
## iteration 235
## iteration 236
## iteration 237
## iteration 238
## iteration 239
## iteration 240
## iteration 241
## iteration 242
## iteration 243
## iteration 244
## iteration 245
## iteration 246
## iteration 247
## iteration 248
## iteration 249
## iteration 250
class_lasso20<-classification(X = dataX, y = Y, learningsets = CVdat, classifier = LassoCMA, genesel = varsel_lasso , nbgene = 20, norm.fraction=1)
## iteration 1
## iteration 2
## iteration 3
## iteration 4
## iteration 5
## iteration 6
## iteration 7
## iteration 8
## iteration 9
## iteration 10
## iteration 11
## iteration 12
## iteration 13
## iteration 14
## iteration 15
## iteration 16
## iteration 17
## iteration 18
## iteration 19
## iteration 20
## iteration 21
## iteration 22
## iteration 23
## iteration 24
## iteration 25
## iteration 26
## iteration 27
## iteration 28
## iteration 29
## iteration 30
## iteration 31
## iteration 32
## iteration 33
## iteration 34
## iteration 35
## iteration 36
## iteration 37
## iteration 38
## iteration 39
## iteration 40
## iteration 41
## iteration 42
## iteration 43
## iteration 44
## iteration 45
## iteration 46
## iteration 47
## iteration 48
## iteration 49
## iteration 50
## iteration 51
## iteration 52
## iteration 53
## iteration 54
## iteration 55
## iteration 56
## iteration 57
## iteration 58
## iteration 59
## iteration 60
## iteration 61
## iteration 62
## iteration 63
## iteration 64
## iteration 65
## iteration 66
## iteration 67
## iteration 68
## iteration 69
## iteration 70
## iteration 71
## iteration 72
## iteration 73
## iteration 74
## iteration 75
## iteration 76
## iteration 77
## iteration 78
## iteration 79
## iteration 80
## iteration 81
## iteration 82
## iteration 83
## iteration 84
## iteration 85
## iteration 86
## iteration 87
## iteration 88
## iteration 89
## iteration 90
## iteration 91
## iteration 92
## iteration 93
## iteration 94
## iteration 95
## iteration 96
## iteration 97
## iteration 98
## iteration 99
## iteration 100
## iteration 101
## iteration 102
## iteration 103
## iteration 104
## iteration 105
## iteration 106
## iteration 107
## iteration 108
## iteration 109
## iteration 110
## iteration 111
## iteration 112
## iteration 113
## iteration 114
## iteration 115
## iteration 116
## iteration 117
## iteration 118
## iteration 119
## iteration 120
## iteration 121
## iteration 122
## iteration 123
## iteration 124
## iteration 125
## iteration 126
## iteration 127
## iteration 128
## iteration 129
## iteration 130
## iteration 131
## iteration 132
## iteration 133
## iteration 134
## iteration 135
## iteration 136
## iteration 137
## iteration 138
## iteration 139
## iteration 140
## iteration 141
## iteration 142
## iteration 143
## iteration 144
## iteration 145
## iteration 146
## iteration 147
## iteration 148
## iteration 149
## iteration 150
## iteration 151
## iteration 152
## iteration 153
## iteration 154
## iteration 155
## iteration 156
## iteration 157
## iteration 158
## iteration 159
## iteration 160
## iteration 161
## iteration 162
## iteration 163
## iteration 164
## iteration 165
## iteration 166
## iteration 167
## iteration 168
## iteration 169
## iteration 170
## iteration 171
## iteration 172
## iteration 173
## iteration 174
## iteration 175
## iteration 176
## iteration 177
## iteration 178
## iteration 179
## iteration 180
## iteration 181
## iteration 182
## iteration 183
## iteration 184
## iteration 185
## iteration 186
## iteration 187
## iteration 188
## iteration 189
## iteration 190
## iteration 191
## iteration 192
## iteration 193
## iteration 194
## iteration 195
## iteration 196
## iteration 197
## iteration 198
## iteration 199
## iteration 200
## iteration 201
## iteration 202
## iteration 203
## iteration 204
## iteration 205
## iteration 206
## iteration 207
## iteration 208
## iteration 209
## iteration 210
## iteration 211
## iteration 212
## iteration 213
## iteration 214
## iteration 215
## iteration 216
## iteration 217
## iteration 218
## iteration 219
## iteration 220
## iteration 221
## iteration 222
## iteration 223
## iteration 224
## iteration 225
## iteration 226
## iteration 227
## iteration 228
## iteration 229
## iteration 230
## iteration 231
## iteration 232
## iteration 233
## iteration 234
## iteration 235
## iteration 236
## iteration 237
## iteration 238
## iteration 239
## iteration 240
## iteration 241
## iteration 242
## iteration 243
## iteration 244
## iteration 245
## iteration 246
## iteration 247
## iteration 248
## iteration 249
## iteration 250
result_list <- list(class_lasso5, class_lasso10, class_lasso15, class_lasso20)
#Classification accuracy is compared:
comparison_lasso<- compare(result_list,plot = F, measure = c("misclassification","auc"))
print(comparison_lasso)
## misclassification auc
## Lasso 0.3013 0.7448
## Lasso2 0.2894 0.7681
## Lasso3 0.3149 0.7452
## Lasso4 0.3299 0.7196
# misclassification auc
# Lasso 0.3011429 0.7456723
# Lasso2 0.2908571 0.7672543
# Lasso3 0.3134286 0.7465282
# Lasso4 0.3298571 0.7196486
#Based on results, the method Lasso2 with the best classification accuracy (maximum AUC=0.7672543) is the one with 10 variables.
#Thus, we print the 10 most selected variables in the iterative process, and this is the model proposed:
ntop<-10
seliter <- numeric()
for (i in 1:iterations) seliter <- c(seliter, toplist(varsel_lasso, iter = i, top = ntop, show = FALSE)$index)
selected_lasso<-sort(table(seliter), dec = TRUE)
index_lasso<-as.numeric(names(selected_lasso[1:ntop]))
topselection_lasso<-data.frame(colnames(dataX)[index_lasso], selected_lasso[1:ntop], 100*selected_lasso[1:ntop]/iterations)
colnames(topselection_lasso)<-c("variable", "frequency of selection", "percentage of selection")
topselection_lasso
## variable frequency of selection percentage of selection NA NA
## 1 DIAPH3.2_s 35 49 35 98
## 2 MMP9_s 3 42 3 84
## 3 ALDH4A1_s 6 33 6 66
## 4 GNAZ_s 49 33 49 66
## 5 AYTL2_s 9 32 9 64
## 6 CDC42BPA_s 17 27 17 54
## 7 Contig63649_RC_s 42 22 42 44
## 8 DTL_s 23 21 23 42
## 9 TSPYL5_s 10 18 10 36
## 10 PECI_s 34 18 34 36
#The following is the list of selected variables for my fitted logistical model using LASSO:
# variable frequency of selection percentage of selection NA NA
# 1 DIAPH3.2_s 5 49 5 98
# 2 MMP9_s 24 42 24 84
# 3 ALDH4A1_s 25 33 25 66
# 4 GNAZ_s 21 32 21 64
# 5 AYTL2_s 48 32 48 64
# 6 CDC42BPA_s 22 28 22 56
# 7 Contig63649_RC_s 27 22 27 44
# 8 DTL_s 40 21 40 42
# 9 TSPYL5_s 3 18 3 36
# 10 PECI_s 41 18 41 36
QUESTION 16 Obtain Kaplan-Meier survival curves for the time of symptoms as a function of the kind of infection and test for the significance of the difference in duration of symptoms. Discuss the results.
#BACKGROUND: In survival analysis the outcome of interest requires information on two variables, a time variable and an indicator variable.
#The indicator variables is 1 when the event of interest has occurred or 0 otherwise. This two variables are specified together with the function Surv(,)
#and this object is used as the outcome in the analysis.
#stime: Time with symptoms (days).
#sind: Indicator of symptoms: (1 = symptoms finished; 0 = symptoms remain)
#hosp: Indicator of hospitalization risk event (1= hospitalization, 0 = no hospitalization) : THIS WILL BE EVALUATED ADDITIONALLY
#Function survfit() applied to a survival object Surv(,)provides tables and plots of Kaplan-Meier survival curves.
#Kaplan-Meier curves for the time of symptoms.
kmcurve1<-survfit(Surv(viral34_ca$stime,viral34_ca$sind)~ 1)
summary(kmcurve1)
## Call: survfit(formula = Surv(viral34_ca$stime, viral34_ca$sind) ~ 1)
##
## time n.risk n.event Pr((s0)) Pr(symptoms_finished)
## 0.353 139 1 0.993 0.00719
## 0.649 138 1 0.986 0.01439
## 0.936 137 1 0.978 0.02158
## 0.961 136 1 0.971 0.02878
## 1.210 135 1 0.964 0.03597
## 1.388 134 1 0.957 0.04317
## 1.500 133 1 0.950 0.05036
## 1.610 132 1 0.942 0.05755
## 1.613 131 1 0.935 0.06475
## 1.717 130 1 0.928 0.07194
## 1.733 129 1 0.921 0.07914
## 1.947 128 1 0.914 0.08633
## 1.966 127 1 0.906 0.09353
## 1.974 126 1 0.899 0.10072
## 2.223 125 1 0.892 0.10791
## 2.297 124 1 0.885 0.11511
## 2.335 123 1 0.878 0.12230
## 2.341 122 1 0.871 0.12950
## 2.615 120 1 0.863 0.13675
## 2.680 119 1 0.856 0.14400
## 2.697 118 1 0.849 0.15126
## 2.812 117 1 0.841 0.15851
## 2.853 116 1 0.834 0.16577
## 3.121 115 1 0.827 0.17302
## 3.220 114 1 0.820 0.18028
## 3.420 112 1 0.812 0.18759
## 3.439 111 1 0.805 0.19491
## 3.655 110 1 0.798 0.20223
## 3.915 109 1 0.790 0.20955
## 4.219 108 1 0.783 0.21687
## 4.446 107 1 0.776 0.22419
## 4.621 106 1 0.768 0.23151
## 4.972 104 1 0.761 0.23890
## 5.117 101 1 0.754 0.24643
## 6.565 79 1 0.744 0.25597
## 6.995 70 1 0.733 0.26660
## 8.129 56 1 0.720 0.27970
## 8.304 53 1 0.707 0.29329
## 8.528 52 1 0.693 0.30688
## 8.561 51 1 0.680 0.32047
## 8.925 47 1 0.665 0.33493
## 8.988 46 1 0.651 0.34939
## 9.999 36 1 0.633 0.36746
## 11.211 25 1 0.607 0.39276
## 11.740 19 1 0.575 0.42472
## 12.465 16 1 0.539 0.46068
## 14.012 9 1 0.479 0.52060
plot(kmcurve1, main="Kaplan-Meier estimate with 95% confidence bounds", xlab="time", ylab="survival function")
# #Kaplan-Meier curves for the time of symptoms.
# kmcurve2<-survfit(Surv(viral34_ca$stime,viral34_ca$hosp)~ 1)
# summary(kmcurve2)
# plot(kmcurve2, main="Kaplan-Meier estimate with 95% confidence bounds", xlab="time", ylab="survival function")
# #"KM curve for hospitalization or no hospitalization
#Kaplan-Meier curves for the time of symptoms for the two levels of infection
kmcurve3<-survfit(Surv(viral34_ca$stime,viral34_ca$sind)~viral34_ca$infection)
summary(kmcurve3)
## Call: survfit(formula = Surv(viral34_ca$stime, viral34_ca$sind) ~ viral34_ca$infection)
##
## viral34_ca$infection=bacterial_infection
## time n.risk n.event Pr((s0)) Pr(symptoms_finished)
## 0.353 68 1 0.985 0.0147
## 0.649 67 1 0.971 0.0294
## 0.936 66 1 0.956 0.0441
## 1.210 65 1 0.941 0.0588
## 1.613 64 1 0.926 0.0735
## 1.717 63 1 0.912 0.0882
## 1.733 62 1 0.897 0.1029
## 1.947 61 1 0.882 0.1176
## 2.297 60 1 0.868 0.1324
## 2.335 59 1 0.853 0.1471
## 2.615 57 1 0.838 0.1620
## 2.812 56 1 0.823 0.1770
## 3.655 54 1 0.808 0.1922
## 4.219 53 1 0.793 0.2075
## 4.446 52 1 0.777 0.2227
## 5.117 51 1 0.762 0.2380
## 6.995 36 1 0.741 0.2591
## 8.129 29 1 0.715 0.2847
## 8.304 26 1 0.688 0.3122
## 8.528 25 1 0.660 0.3397
## 8.925 22 1 0.630 0.3697
## 8.988 21 1 0.600 0.3997
## 11.211 14 1 0.557 0.4426
## 11.740 11 1 0.507 0.4933
##
## viral34_ca$infection=viral_infection
## time n.risk n.event Pr((s0)) Pr(symptoms_finished)
## 0.961 71 1 0.986 0.0141
## 1.388 70 1 0.972 0.0282
## 1.500 69 1 0.958 0.0423
## 1.610 68 1 0.944 0.0563
## 1.966 67 1 0.930 0.0704
## 1.974 66 1 0.915 0.0845
## 2.223 65 1 0.901 0.0986
## 2.341 64 1 0.887 0.1127
## 2.680 63 1 0.873 0.1268
## 2.697 62 1 0.859 0.1408
## 2.853 61 1 0.845 0.1549
## 3.121 60 1 0.831 0.1690
## 3.220 59 1 0.817 0.1831
## 3.420 58 1 0.803 0.1972
## 3.439 57 1 0.789 0.2113
## 3.915 56 1 0.775 0.2254
## 4.621 55 1 0.761 0.2394
## 4.972 53 1 0.746 0.2538
## 6.565 41 1 0.728 0.2720
## 8.561 27 1 0.701 0.2990
## 9.999 19 1 0.664 0.3358
## 12.465 7 1 0.569 0.4307
## 14.012 6 1 0.474 0.5256
plot(kmcurve3, col=2:3)
legend("topright",col=2:3, legend=c("bacterial_infection","viral_infection"), lty=1)
# #Kaplan-Meier curves for the time of symptoms for the two levels of infection (1:2)
# kmcurve4<-survfit(Surv(viral34_ca$stime,viral34_ca$hosp)~ viral34_ca$infection)
# summary(kmcurve4)
# plot(kmcurve4, main="KM curve for for hospitalization or no hospitalization", col=(2:3))
# legend("topright",col=2:3, legend=c("bacterial_Infection","viral_Infection"), lty=1)
#Kaplan-Meier curves describe and summarize the survival times: estimation and interpretation of survivor and/or hazard functions from survival data (Kaplan-Meier
#estimator). The Kaplan and Meier (K-M) estimator of the survivor function is a step function with
#jumps at the observed event times. Based on the curves, it is apparent that the mean and median survival times
#for viral and bacterial infection "look" similar.
#The log-rank test is used to confirm if two survival curves are statistically different by testing following hypothesis:
#The null hypothesis is H0: S1(t) =S2(t), for all t > 0
#The alternative hypothesis is: H1: S1(t) S2(t), for some t > 0
#Performing the log-rank test for equality of two survival functions according to type of infection
#survdiff(Surv(viral34_ca$stime,viral34_ca$sind)~viral34_ca$infection)
#Error in survdiff(Surv(viral34_ca$stime, viral34_ca$sind) ~ viral34_ca$infection):Right censored data only
# #Performing the log-rank test for equality of two survival functions according to type of infection
# survdiff(Surv(viral34_ca$stime,viral34_ca$hosp)~viral34_ca$infection)
# #Error in survdiff(Surv(viral34_c$stime, viral34_c$hosp) ~ viral34_c$infection) : Right censored data only
QUESTION 17 Perform a Cox regression model for duration symptoms as a function of the covariates (ignore gene expression levels). Discuss the results
#BACKGROUND: The Cox PH model is the most commonly used regression model for a survival time.
#The Cox model specifies the hazard at time t for an individual with covariates (e.g. infection type) x1
#coxmodel1<-coxph(Surv(viral34_ca$stime,viral34_ca$sind)~ viral34_ca$infection+viral34_ca$sind+viral34_ca$gender+viral34_ca$age+viral34_ca$ancestry)
#coxmodel1
#Running with hospitalization categorical variable:
#coxmodel2<-coxph(Surv(viral34_ca$stime,viral34_ca$hosp)~ viral34_ca$infection+viral34_ca$sind+viral34_ca$gender+viral34_ca$age+viral34_ca$ancestry)
#coxmodel2
#Error in coxph(Surv(viral34_c$stime, viral34_c$hosp) ~ viral34_c$infection + : an id statement is required for multi-state models
#Error in coxph(Surv(viral34_ca$stime, viral34_ca$hosp) ~ viral34_ca$infection + :
# an id statement is required for multi-state models
#cox<-survfit(coxmodel1)
#cox
#plot(cox)
#Cox diagnostics
#1. Non overlaping survival curves
#2. log(-log(Surv) aproximately parallel lines
#plot(log(-log(kmcurve3$surv)))
#No strata(covariate) + logwbc was added to the original list of covariates
# No stratified Cox model was plotted